diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/README.txt lucene_solr_4_1-org/lucene/analysis/README.txt *** lucene-solr_4_1.bak/lucene/analysis/README.txt Wed Feb 27 22:05:49 2013 --- lucene_solr_4_1-org/lucene/analysis/README.txt Wed Feb 27 11:29:43 2013 *************** *** 28,33 **** --- 28,36 ---- lucene-analyzers-morfologik-XX.jar An analyzer using the Morfologik stemming library. + lucene-analyzers-opennlp-XX.jar + An analyzer using the OpenNLP natural-language processing library. + lucene-analyzers-phonetic-XX.jar An add-on analysis library that provides phonetic encoders via Apache Commons-Codec. Note: this module depends on the commons-codec jar *************** *** 49,54 **** --- 52,58 ---- icu/src/java kuromoji/src/java morfologik/src/java + opennlp/src/java phonetic/src/java smartcn/src/java stempel/src/java *************** *** 59,64 **** --- 63,69 ---- icu/src/test kuromoji/src/test morfologik/src/test + opennlp/src/test phonetic/src/test smartcn/src/test stempel/src/test diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/build.xml lucene_solr_4_1-org/lucene/analysis/build.xml *** lucene-solr_4_1.bak/lucene/analysis/build.xml Wed Feb 27 22:05:49 2013 --- lucene_solr_4_1-org/lucene/analysis/build.xml Wed Feb 27 11:29:43 2013 *************** *** 21,33 **** Additional Analyzers ! - common: Additional Analyzers ! - icu: Analyzers that use functionality from ICU ! - kuromoji: Japanese Morphological Analyzer - morfologik: Morfologik Stemmer - smartcn: Smart Analyzer for Simplified Chinese Text - stempel: Algorithmic Stemmer for Polish ! - uima: UIMA Analysis module --- 21,35 ---- Additional Analyzers ! - common: Additional Analyzers ! - icu: Analyzers that use functionality from ICU ! - kuromoji: Japanese Morphological Analyzer - morfologik: Morfologik Stemmer + - opennlp: OpenNLP Natural Language Processing package + - phonetic: Phoneme-based analyzers - smartcn: Smart Analyzer for Simplified Chinese Text - stempel: Algorithmic Stemmer for Polish ! - uima: UIMA Analysis module *************** *** 64,69 **** --- 66,75 ---- + + + + *************** *** 81,87 **** ! --- 87,93 ---- ! diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/build.xml lucene_solr_4_1-org/lucene/analysis/opennlp/build.xml *** lucene-solr_4_1.bak/lucene/analysis/opennlp/build.xml Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/build.xml Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,45 ---- + + + + + + + + OpenNLP Library Integration + + + + + + + + + + + + + + + + + + + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/ivy.xml lucene_solr_4_1-org/lucene/analysis/opennlp/ivy.xml *** lucene-solr_4_1.bak/lucene/analysis/opennlp/ivy.xml Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/ivy.xml Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,35 ---- + + + + + + + + + + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/FilterPayloadsFilter.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/FilterPayloadsFilter.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/FilterPayloadsFilter.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/FilterPayloadsFilter.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,71 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp; + + import java.io.IOException; + import java.util.Arrays; + + import org.apache.lucene.analysis.util.FilteringTokenFilter; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; + import org.apache.lucene.util.BytesRef; + + /** + * Screen tokens for payload contents. Keep all/drop all tokens with a payload in the list. + * + * TODO: Change to subclass from FilteringTokenFilter as it handles offset stuff. + * TODO: Add offset checking to unit test. + */ + + public final class FilterPayloadsFilter extends FilteringTokenFilter { + + final private byte[][] payloads; + private final boolean keep; + private PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + + public FilterPayloadsFilter(boolean enablePositionIncrements, TokenStream input, byte[][] payloads, boolean keep) { + super(enablePositionIncrements, input); + this.payloads = payloads; + this.keep = keep; + } + + @Override + protected boolean accept() throws IOException { + BytesRef p = payloadAtt.getPayload(); + if (p == null && keep) { + return false; + } else if (p == null && !keep) { + return true; + } else { + byte[] key = Arrays.copyOfRange(p.bytes, p.offset, p.offset + p.length); + int n = 0; + while(n < payloads.length) { + if (Arrays.equals(key, payloads[n])) { + break; + } + n++; + } + if ((n < payloads.length) == keep) { + return true; + } else{ + return false; + } + } + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPFilter.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPFilter.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPFilter.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPFilter.java Wed Feb 27 14:19:47 2013 *************** *** 0 **** --- 1,238 ---- + package org.apache.lucene.analysis.opennlp; + + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + import java.io.IOException; + import java.io.UnsupportedEncodingException; + import java.util.ArrayList; + import java.util.Arrays; + import java.util.Iterator; + import java.util.List; + import java.util.Locale; + + import opennlp.tools.util.Span; + + import org.apache.lucene.analysis.TokenFilter; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.opennlp.tools.NLPChunkerOp; + import org.apache.lucene.analysis.opennlp.tools.NLPNERTaggerOp; + import org.apache.lucene.analysis.opennlp.tools.NLPPOSTaggerOp; + import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; + import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; + import org.apache.lucene.util.Attribute; + import org.apache.lucene.util.AttributeSource; + import org.apache.lucene.util.BytesRef; + + /** + * Run OpenNLP sentence-processing tools + * OpenNLP Tokenizer- removed sentence detection + * Optional: POS tagger or phrase chunker. These tag all terms. + * Optional: one or more Named Entity Resolution taggers. These tag only some terms. + * + * Use file names as keys for cached models. + * + * TODO: a) do positionincr attrs b) implement all attr types + * + * Hacks: + * hack #1: EN POS tagger sometimes tags last word as a period if no period at the end + * hack #2: tokenizer needs to split words with punctuation and it doesn't + */ + public final class OpenNLPFilter extends TokenFilter { + + // TODO: if there's an ICU for this, that's great + private static String SENTENCE_BREAK = "[.?!]"; + + private final boolean doPOS; + private final boolean doChunking; + private final boolean doNER; + + private int finalOffset; + + // cloned attrs of all tokens + private List tokenAttrs = new ArrayList(); + boolean first = true; + int indexToken = 0; + // private char[] fullText; + // hack #1: have to remove final term if we added one + private boolean stripFinal = false; + + private NLPPOSTaggerOp posTaggerOp = null; + private NLPChunkerOp chunkerOp = null; + private List nerTaggerOps = new ArrayList(); + + + public OpenNLPFilter( + TokenStream input, + NLPPOSTaggerOp posTaggerOp, + NLPChunkerOp chunkerOp, + ArrayList nerTaggerOps) throws IOException { + super(input); + this.posTaggerOp = posTaggerOp; + this.chunkerOp = chunkerOp; + this.nerTaggerOps = nerTaggerOps; + boolean havePOS = (posTaggerOp != null); + doChunking = (chunkerOp != null); + doPOS = doChunking ? false : havePOS; + doNER = (nerTaggerOps != null); + } + + @Override + public final boolean incrementToken() throws IOException { + clearAttributes(); + if (first) { + String[] words = walkTokens(); + if (words.length == 0) { + return false; + } + createTags(words); + first = false; + indexToken = 0; + } + if (stripFinal && indexToken == tokenAttrs.size() - 1) { + return false; + } + if (indexToken == tokenAttrs.size()) { + return false; + } + AttributeSource as = tokenAttrs.get(indexToken); + Iterator> it = as.getAttributeClassesIterator(); + while(it.hasNext()) { + Class attrClass = it.next(); + if (! hasAttribute(attrClass)) { + addAttribute(attrClass); + } + } + as.copyTo(this); + indexToken++; + return true; + } + + private String[] walkTokens() throws IOException { + List wordList = new ArrayList(); + while (input.incrementToken()) { + CharTermAttribute textAtt = input.getAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = input.getAttribute(OffsetAttribute.class); + char[] buffer = textAtt.buffer(); + String word = new String(buffer, 0, offsetAtt.endOffset() - offsetAtt.startOffset()); + wordList.add(word); + AttributeSource attrs = input.cloneAttributes(); + tokenAttrs.add(attrs); + } + String[] words = new String[wordList.size()]; + for(int i = 0; i < words.length; i++) { + words[i] = wordList.get(i); + } + return words; + } + + private void createTags(String[] words) { + String[] appended = appendDot(words); + if (doPOS) { + String[] tags = assignPOS(appended); + appendPayloads(tags, words.length); + } + else if (doChunking) { + String[] pos = assignPOS(appended); + String[] tags = createChunks(words, pos); + appendPayloads(tags, words.length); + } + if (doNER) { + for(NLPNERTaggerOp op: nerTaggerOps) { + String[] tags = createAllNER(op, appended); + appendPayloads(tags, words.length); + } + } + } + + // Hack #1: taggers expect a sentence break as the final term. + // This does not make it into the attribute set lists. + private String[] appendDot(String[] words) { + int nWords = words.length; + String lastWord = words[nWords - 1]; + if (lastWord.length() != 1) { + return words; + } + if (lastWord.matches(SENTENCE_BREAK)) { + return words; + } + words = Arrays.copyOf(words, nWords + 1); + words[nWords] = "."; + return words; + } + + private void appendPayloads(String[] tags, int length) { + for(int i = 0; i < length; i++) { + AttributeSource attrs = tokenAttrs.get(i); + if (tags[i] != null) { + try { + PayloadAttribute payloadAtt = attrs.hasAttribute(PayloadAttribute.class) ? attrs.getAttribute(PayloadAttribute.class) : attrs.addAttribute(PayloadAttribute.class); + BytesRef p = new BytesRef(tags[i].toUpperCase(Locale.getDefault()).getBytes("UTF-8")); + payloadAtt.setPayload(p); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + } + } + + private String[] assignPOS(String[] words) { + return posTaggerOp.getPOSTags(words); + } + + private String[] createChunks(String[] words, String[] pos) { + return chunkerOp.getChunks(words, pos, null); + } + + private String[] createAllNER(NLPNERTaggerOp nerTagger, String[] words) { + Span[] nerSpans = nerTagger.getNames(words); + String[] nerTags = new String[words.length]; + if (nerSpans.length == 0) { + return nerTags; + } + String tag = nerSpans[0].getType(); + for(int i = 0; i < nerSpans.length; i++) { + Span tagged = nerSpans[i]; + for(int j = tagged.getStart(); j < tagged.getEnd(); j++) { + nerTags[j] = tag; + } + } + return nerTags; + } + + @Override + public final void end() { + clearAttributes(); + OffsetAttribute offsetAtt = getAttribute(OffsetAttribute.class); + offsetAtt.setOffset(finalOffset, finalOffset); + tokenAttrs.clear(); + } + + @Override + public void reset() throws IOException { + super.reset(); + clearAttributes(); + restartAtBeginning(); + } + + private void restartAtBeginning() throws IOException { + indexToken = 0; + finalOffset = 0; + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java Wed Feb 27 14:19:17 2013 *************** *** 0 **** --- 1,182 ---- + package org.apache.lucene.analysis.opennlp; + + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + import java.io.IOException; + import java.io.Reader; + import java.util.Arrays; + + import opennlp.tools.util.Span; + + import org.apache.lucene.analysis.Tokenizer; + import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp; + import org.apache.lucene.analysis.opennlp.tools.NLPTokenizerOp; + import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; + import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + + /** + * Run OpenNLP SentenceDetector and Tokenizer. + * Must have Sentence and/or Tokenizer. + */ + public final class OpenNLPTokenizer extends Tokenizer { + private static final int DEFAULT_BUFFER_SIZE = 256; + + private int finalOffset; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + // + private Span[] sentences = null; + private Span[][] words = null; + private Span[] wordSet = null; + boolean first = true; + int indexSentence = 0; + int indexWord = 0; + private char[] fullText; + + private NLPSentenceDetectorOp sentenceOp = null; + private NLPTokenizerOp tokenizerOp = null; + + public OpenNLPTokenizer(Reader input, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp, boolean splitPunctuation) throws IOException { + super(input); + termAtt.resizeBuffer(DEFAULT_BUFFER_SIZE); + if (sentenceOp == null && tokenizerOp == null) { + throw new IllegalArgumentException("OpenNLPTokenizer: need one or both of Sentence Detector and Tokenizer"); + } + this.sentenceOp = sentenceOp; + this.tokenizerOp = tokenizerOp; + } + + // OpenNLP ops run all-at-once. Have to cache sentence and/or word spans and feed them out. + // Cache entire input buffer- don't know if this is the right implementation. + // Of if the CharTermAttribute can cache it across multiple increments? + + @Override + public final boolean incrementToken() throws IOException { + if (first) { + loadAll(); + restartAtBeginning(); + first = false; + } + if (sentences.length == 0) { + return false; + } + int sentenceOffset = sentences[indexSentence].getStart(); + if (wordSet == null) { + wordSet = words[indexSentence]; + } + clearAttributes(); + while (indexSentence < sentences.length) { + while (indexWord == wordSet.length) { + indexSentence++; + if (indexSentence < sentences.length) { + wordSet = words[indexSentence]; + indexWord = 0; + sentenceOffset = sentences[indexSentence].getStart(); + } else { + return false; + } + } + // set termAtt from private buffer + Span sentence = sentences[indexSentence]; + Span word = wordSet[indexWord]; + int spot = sentence.getStart() + word.getStart(); + termAtt.setEmpty(); + int termLength = word.getEnd() - word.getStart(); + if (termAtt.buffer().length < termLength) { + termAtt.resizeBuffer(termLength); + } + termAtt.setLength(termLength); + char[] buffer = termAtt.buffer(); + finalOffset = correctOffset(sentenceOffset + word.getEnd()); + offsetAtt.setOffset(correctOffset(word.getStart() + sentenceOffset), finalOffset); + for(int i = 0; i < termLength; i++) { + buffer[i] = fullText[spot + i]; + } + + indexWord++; + return true; + } + + return false; + } + + void restartAtBeginning() throws IOException { + indexWord = 0; + indexSentence = 0; + indexWord = 0; + finalOffset = 0; + wordSet = null; + } + + void loadAll() throws IOException { + fillBuffer(); + detectSentences(); + words = new Span[sentences.length][]; + for(int i = 0; i < sentences.length; i++) { + splitWords(i); + } + } + + void splitWords(int i) { + Span current = sentences[i]; + String sentence = String.copyValueOf(fullText, current.getStart(), current.getEnd() - current.getStart()); + words[i] = tokenizerOp.getTerms(sentence); + } + + // read all text, turn into sentences + void detectSentences() throws IOException { + fullText.hashCode(); + sentences = sentenceOp.splitSentences(new String(fullText)); + } + + void fillBuffer() throws IOException { + int offset = 0; + int size = 10000; + fullText = new char[size]; + int length = input.read(fullText); + while(length == size) { + // fullText = IOUtils.toCharArray(input); + fullText = Arrays.copyOf(fullText, offset + size); + offset += size; + length = input.read(fullText, offset, size); + } + fullText = Arrays.copyOf(fullText, offset + length); + } + + @Override + public final void end() { + // set final offset + offsetAtt.setOffset(finalOffset, finalOffset); + } + + // public void reset(Reader input) throws IOException { + // super.reset(input); + // fullText = null; + // sentences = null; + // words = null; + // first = true; + // } + + @Override + public void reset() throws IOException { + super.reset(); + clearAttributes(); + restartAtBeginning(); + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/StripPayloadsFilter.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/StripPayloadsFilter.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/StripPayloadsFilter.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/StripPayloadsFilter.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,53 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp; + + import java.io.IOException; + + import org.apache.lucene.analysis.TokenFilter; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; + + + /** + * Remove all payloads + * + * TODO: subclass from FilteringTokenFilter - it handles positionIncrements + */ + + public final class StripPayloadsFilter extends TokenFilter { + + public StripPayloadsFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + PayloadAttribute payloadAtt = getAttribute(PayloadAttribute.class); + if (payloadAtt != null) { + payloadAtt.setPayload(null); + } + return true; + } + + + return false; + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,43 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp.tools; + + import java.io.IOException; + import opennlp.tools.chunker.ChunkerME; + import opennlp.tools.chunker.ChunkerModel; + import opennlp.tools.util.InvalidFormatException; + + /** + * Supply OpenNLP Chunking tool + * Requires binary models from OpenNLP project on SourceForge. + */ + + public class NLPChunkerOp { + private ChunkerME chunker = null; + + public NLPChunkerOp(ChunkerModel chunkerModel) throws InvalidFormatException, IOException { + chunker = new ChunkerME(chunkerModel); + } + + public synchronized String[] getChunks(String[] words, String[] tags, double[] probs) { + String[] chunks = chunker.chunk(words, tags); + if (probs != null) + chunker.probs(probs); + return chunks; + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,43 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp.tools; + + import opennlp.tools.namefind.NameFinderME; + import opennlp.tools.namefind.TokenNameFinder; + import opennlp.tools.namefind.TokenNameFinderModel; + import opennlp.tools.util.Span; + + /** + * Supply OpenNLP Named Entity Resolution tool + * Requires binary models from OpenNLP project on SourceForge. + */ + + public class NLPNERTaggerOp { + private final TokenNameFinder nameFinder; + + public NLPNERTaggerOp(TokenNameFinderModel model) { + this.nameFinder = new NameFinderME(model); + } + + public synchronized Span[] getNames(String[] words) { + Span[] names = nameFinder.find(words); + nameFinder.clearAdaptiveData(); + return names; + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,42 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp.tools; + import java.io.IOException; + + import opennlp.tools.postag.POSModel; + import opennlp.tools.postag.POSTagger; + import opennlp.tools.postag.POSTaggerME; + import opennlp.tools.util.InvalidFormatException; + + /** + * Supply OpenNLP Parts-Of-Speech Tagging tool + * Requires binary models from OpenNLP project on SourceForge. + */ + + public class NLPPOSTaggerOp { + private POSTagger tagger = null; + + public NLPPOSTaggerOp(POSModel model) throws InvalidFormatException, IOException { + tagger = new POSTaggerME(model); + } + + public synchronized String[] getPOSTags(String[] words) { + String[] tags = tagger.tag(words); + return tags; + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,53 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp.tools; + import java.io.IOException; + + import opennlp.tools.sentdetect.SentenceDetectorME; + import opennlp.tools.sentdetect.SentenceModel; + import opennlp.tools.util.InvalidFormatException; + import opennlp.tools.util.Span; + + /** + * Supply OpenNLP Sentence Detector tool + * Requires binary models from OpenNLP project on SourceForge. + */ + + public class NLPSentenceDetectorOp { + private final SentenceDetectorME sentenceSplitter; + + public NLPSentenceDetectorOp(SentenceModel model) throws InvalidFormatException, IOException { + sentenceSplitter = new SentenceDetectorME(model); + } + + public NLPSentenceDetectorOp() { + sentenceSplitter = null; + } + + public synchronized Span[] splitSentences(String line) { + if (sentenceSplitter != null) { + return sentenceSplitter.sentPosDetect(line); + } else { + Span[] shorty = new Span[1]; + shorty[0] = new Span(0, line.length()); + return shorty; + } + + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,55 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp.tools; + + import opennlp.tools.tokenize.Tokenizer; + import opennlp.tools.tokenize.TokenizerME; + import opennlp.tools.tokenize.TokenizerModel; + import opennlp.tools.util.Span; + + /** + * Supply OpenNLP Sentence Tokenizer tool + * Requires binary models from OpenNLP project on SourceForge. + */ + + public class NLPTokenizerOp { + private final Tokenizer tokenizer; + + public NLPTokenizerOp(TokenizerModel model) { + tokenizer = new TokenizerME(model); + } + + public NLPTokenizerOp() { + tokenizer = null; + } + + public synchronized Span[] getTerms(String sentence) { + if (tokenizer == null) { + Span[] span1 = new Span[1]; + span1[0] = new Span(0, sentence.length()); + return span1; + } + Span[] terms = tokenizer.tokenizePos(sentence); + return terms; + } + + public synchronized String[] getTermsStrings(String sentence) { + String[] terms = tokenizer.tokenize(sentence); + return terms; + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,139 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp.tools; + import java.io.IOException; + import java.io.InputStream; + import java.util.Map; + import java.util.concurrent.ConcurrentHashMap; + + + import opennlp.tools.chunker.ChunkerModel; + import opennlp.tools.namefind.TokenNameFinder; + import opennlp.tools.namefind.TokenNameFinderModel; + import opennlp.tools.postag.POSModel; + import opennlp.tools.sentdetect.SentenceModel; + import opennlp.tools.tokenize.TokenizerModel; + import opennlp.tools.util.InvalidFormatException; + + /** + * Supply OpenNLP Named Entity Recognizer + * Cache model file objects. Assumes model files are thread-safe. + * + */ + + public class OpenNLPOpsFactory { + static TokenNameFinder nameFinder = null; + private static Map sentenceModels = new ConcurrentHashMap(); + private static ConcurrentHashMap tokenizerModels = new ConcurrentHashMap(); + private static ConcurrentHashMap posTaggerModels = new ConcurrentHashMap(); + private static ConcurrentHashMap chunkerModels = new ConcurrentHashMap(); + private static Map nerModels = new ConcurrentHashMap(); + + public static NLPSentenceDetectorOp getSentenceDetector(String modelName) throws InvalidFormatException, IOException { + if (modelName != null) { + SentenceModel model = sentenceModels.get(modelName); + return new NLPSentenceDetectorOp(model); + } else { + return new NLPSentenceDetectorOp(); + } + } + + public static SentenceModel getSentenceModel(String modelName, InputStream modelStream) throws IOException, + InvalidFormatException { + SentenceModel model = sentenceModels.get(modelName); + if (model == null) { + model = new SentenceModel(modelStream); + sentenceModels.put(modelName, model); + } + return model; + } + + public static NLPTokenizerOp getTokenizer(String modelName) throws InvalidFormatException, IOException { + if (modelName == null) { + return new NLPTokenizerOp(); + } else { + TokenizerModel model = tokenizerModels.get(modelName); + return new NLPTokenizerOp(model); + } + } + + public static TokenizerModel getTokenizerModel(String modelName, InputStream modelStream) throws IOException, + InvalidFormatException { + TokenizerModel model = tokenizerModels.get(modelName); + if (model == null) { + model = new TokenizerModel(modelStream); + tokenizerModels.put(modelName, model); + } + return model; + } + + public static NLPPOSTaggerOp getPOSTagger(String modelName) throws InvalidFormatException, IOException { + POSModel model = posTaggerModels.get(modelName); + return new NLPPOSTaggerOp(model); + } + + public static POSModel getPOSTaggerModel(String modelName, InputStream modelStream) throws IOException, + InvalidFormatException { + POSModel model = posTaggerModels.get(modelName); + if (model == null) { + model = new POSModel(modelStream); + posTaggerModels.put(modelName, model); + } + return model; + } + + public static NLPChunkerOp getChunker(String modelName) throws InvalidFormatException, IOException { + ChunkerModel model = chunkerModels.get(modelName); + return new NLPChunkerOp(model); + } + + public static ChunkerModel getChunkerModel(String modelName, InputStream modelStream) throws IOException, + InvalidFormatException { + ChunkerModel model = chunkerModels.get(modelName); + if (model == null) { + model = new ChunkerModel(modelStream); + chunkerModels.put(modelName, model); + } + return model; + } + + public static NLPNERTaggerOp getNERTagger(String modelName) throws InvalidFormatException, IOException { + TokenNameFinderModel model = nerModels.get(modelName); + return new NLPNERTaggerOp(model); + } + + public static TokenNameFinderModel getNERTaggerModel(String modelName, InputStream modelStream) throws IOException, + InvalidFormatException { + TokenNameFinderModel model = nerModels.get(modelName); + if (model == null) { + model = new TokenNameFinderModel(modelStream); + nerModels.put(modelName, model); + } + return model; + } + + // keeps unit test from blowing out memory + public static void clearModels() { + sentenceModels.clear(); + tokenizerModels.clear(); + posTaggerModels.clear(); + chunkerModels.clear(); + nerModels.clear(); + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/StreamMockSolrResourceLoader.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/StreamMockSolrResourceLoader.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/StreamMockSolrResourceLoader.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/StreamMockSolrResourceLoader.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,55 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp; + + import java.io.FileInputStream; + import java.io.FileNotFoundException; + import java.io.IOException; + import java.io.InputStream; + import java.util.List; + + import org.apache.lucene.analysis.util.ResourceLoader; + + class StreamMockSolrResourceLoader implements ResourceLoader { + + @Override + public InputStream openResource(String resource) { + try { + if (resource.charAt(0) != '/') { + resource = getFullPath(resource); + } + return new FileInputStream(resource); + } catch (FileNotFoundException e) { + throw new IllegalArgumentException(e); + } + } + + // for some reason, solrCore object from test is not visible! + // This always runs under a no-name core, so solr.solr.home is main directory + private String getFullPath(String path) { + String home = System.getProperty("solr.solr.home"); + return home + "/conf/" + path; + } + + @Override + public T newInstance(String cname, Class expectedType) { + return null; + } + + } + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestFilterPayloadsFilter.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestFilterPayloadsFilter.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestFilterPayloadsFilter.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestFilterPayloadsFilter.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,91 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp; + + import org.apache.lucene.analysis.MockTokenizer; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.opennlp.FilterPayloadsFilter; + import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter; + import org.apache.lucene.analysis.payloads.IdentityEncoder; + import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; + import org.apache.lucene.util.BytesRef; + import org.apache.lucene.util.LuceneTestCase; + + import java.io.StringReader; + + public class TestFilterPayloadsFilter extends LuceneTestCase { + + public void testKeepPayloads() throws Exception { + String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; + DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter( + (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)), + DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); + byte[][] payloads = {"VB".getBytes("UTF-8"), "NN".getBytes("UTF-8")}; + FilterPayloadsFilter filter = new FilterPayloadsFilter(false, baseFilter, payloads, true); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); + assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); + assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8")); + assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8")); + assertFalse(filter.incrementToken()); + } + + public void testFilterPayloads() throws Exception { + String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; + DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter( + (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)), + DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); + byte[][] payloads = {"VB".getBytes("UTF-8"), "NN".getBytes("UTF-8")}; + FilterPayloadsFilter filter = new FilterPayloadsFilter(false, baseFilter, payloads, false); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); + assertTermEquals("The", filter, termAtt, payAtt, null); + assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertTermEquals("over", filter, termAtt, payAtt, null); + assertTermEquals("the", filter, termAtt, payAtt, null); + assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); + assertFalse(filter.incrementToken()); + } + + void assertTermPayload(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt) throws Exception { + stream.reset(); + assertTrue(stream.incrementToken()); + assertEquals(expected, termAtt.toString()); + BytesRef payload = payAtt.getPayload(); + assertEquals(null, payload); + } + + void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { + stream.reset(); + assertTrue(stream.incrementToken()); + assertEquals(expected, termAtt.toString()); + BytesRef payload = payAtt.getPayload(); + if (payload != null) { + assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length); + for (int i = 0; i < expectPay.length; i++) { + assertTrue(expectPay[i] + " does not equal: " + payload.bytes[payload.offset + i], expectPay[i] == payload.bytes[payload.offset + i]); + } + } else { + assertTrue("expectPay is not null and it should be", expectPay == null); + } + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestStripPayloadsFilter.java lucene_solr_4_1-org/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestStripPayloadsFilter.java *** lucene-solr_4_1.bak/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestStripPayloadsFilter.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestStripPayloadsFilter.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,62 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.lucene.analysis.opennlp; + + import org.apache.lucene.analysis.MockTokenizer; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.opennlp.StripPayloadsFilter; + import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter; + import org.apache.lucene.analysis.payloads.IdentityEncoder; + import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; + import org.apache.lucene.util.BytesRef; + import org.apache.lucene.util.LuceneTestCase; + + import java.io.StringReader; + + public class TestStripPayloadsFilter extends LuceneTestCase { + + public void testRemovePayloads() throws Exception { + String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN"; + DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter( + (new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)), + DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); + StripPayloadsFilter filter = new StripPayloadsFilter(baseFilter); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); + assertTermPayload("The", filter, termAtt, payAtt); + assertTermPayload("quick", filter, termAtt, payAtt); + assertTermPayload("red", filter, termAtt, payAtt); + assertTermPayload("fox", filter, termAtt, payAtt); + assertTermPayload("jumped", filter, termAtt, payAtt); + assertTermPayload("over", filter, termAtt, payAtt); + assertTermPayload("the", filter, termAtt, payAtt); + assertTermPayload("lazy", filter, termAtt, payAtt); + assertTermPayload("brown", filter, termAtt, payAtt); + assertTermPayload("dogs", filter, termAtt, payAtt); + assertFalse(filter.incrementToken()); + } + + void assertTermPayload(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt) throws Exception { + stream.reset(); + assertTrue(stream.incrementToken()); + assertEquals(expected, termAtt.toString()); + BytesRef payload = payAtt.getPayload(); + assertEquals(null, payload); + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/build.xml lucene_solr_4_1-org/lucene/build.xml *** lucene-solr_4_1.bak/lucene/build.xml Wed Feb 27 22:05:55 2013 --- lucene_solr_4_1-org/lucene/build.xml Wed Feb 27 11:36:04 2013 *************** *** 251,256 **** --- 251,257 ---- + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/ivy-settings.xml lucene_solr_4_1-org/lucene/ivy-settings.xml *** lucene-solr_4_1.bak/lucene/ivy-settings.xml Wed Feb 27 22:05:55 2013 --- lucene_solr_4_1-org/lucene/ivy-settings.xml Wed Feb 27 11:29:43 2013 *************** *** 47,54 **** --- 47,56 ---- + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt lucene_solr_4_1-org/lucene/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt *** lucene-solr_4_1.bak/lucene/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/jwnl-1.4_rc3-NOTICE.txt lucene_solr_4_1-org/lucene/licenses/jwnl-1.4_rc3-NOTICE.txt *** lucene-solr_4_1.bak/lucene/licenses/jwnl-1.4_rc3-NOTICE.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/jwnl-1.4_rc3-NOTICE.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/jwnl-1.4_rc3.jar.sha1 lucene_solr_4_1-org/lucene/licenses/jwnl-1.4_rc3.jar.sha1 *** lucene-solr_4_1.bak/lucene/licenses/jwnl-1.4_rc3.jar.sha1 Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/jwnl-1.4_rc3.jar.sha1 Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + e37ef9062f22bb4ce83d9bc38bb87df7bf671553 diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt lucene_solr_4_1-org/lucene/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt *** lucene-solr_4_1.bak/lucene/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt lucene_solr_4_1-org/lucene/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt *** lucene-solr_4_1.bak/lucene/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 lucene_solr_4_1-org/lucene/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 *** lucene-solr_4_1.bak/lucene/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + b6c5e43e399b076d2c3ce013898c9d6229a55066 diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt lucene_solr_4_1-org/lucene/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt *** lucene-solr_4_1.bak/lucene/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,202 ---- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt lucene_solr_4_1-org/lucene/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt *** lucene-solr_4_1.bak/lucene/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 lucene_solr_4_1-org/lucene/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 *** lucene-solr_4_1.bak/lucene/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/lucene/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + 0357b094d59517e1a389369de900735da8c75bfc diff -crBN -X exclude.pats lucene-solr_4_1.bak/lucene/module-build.xml lucene_solr_4_1-org/lucene/module-build.xml *** lucene-solr_4_1.bak/lucene/module-build.xml Wed Feb 27 22:05:55 2013 --- lucene_solr_4_1-org/lucene/module-build.xml Wed Feb 27 11:29:43 2013 *************** *** 221,226 **** --- 221,247 ---- + + + + + + + + + + + + + + + + + + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/build.xml lucene_solr_4_1-org/solr/build.xml *** lucene-solr_4_1.bak/solr/build.xml Wed Feb 27 22:05:29 2013 --- lucene_solr_4_1-org/solr/build.xml Wed Feb 27 11:29:43 2013 *************** *** 50,58 **** - - - See ${example}/README.txt for how to run the Solr example configuration. --- 50,55 ---- *************** *** 297,302 **** --- 294,300 ---- + - --- 446,451 ---- diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/common-build.xml lucene_solr_4_1-org/solr/common-build.xml *** lucene-solr_4_1.bak/solr/common-build.xml Wed Feb 27 22:05:29 2013 --- lucene_solr_4_1-org/solr/common-build.xml Wed Feb 27 11:29:43 2013 *************** *** 77,82 **** --- 77,83 ---- *************** *** 194,200 **** ! --- 195,201 ---- ! *************** *** 256,261 **** --- 257,263 ---- + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/build.xml lucene_solr_4_1-org/solr/contrib/opennlp/build.xml *** lucene-solr_4_1.bak/solr/contrib/opennlp/build.xml Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/build.xml Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,56 ---- + + + + + + + + Solr Integration with OpenNLP natural-language-processing library + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/ivy.xml lucene_solr_4_1-org/solr/contrib/opennlp/ivy.xml *** lucene-solr_4_1.bak/solr/contrib/opennlp/ivy.xml Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/ivy.xml Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,35 ---- + + + + + + + + + + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/FilterPayloadsFilterFactory.java lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/FilterPayloadsFilterFactory.java *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/FilterPayloadsFilterFactory.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/FilterPayloadsFilterFactory.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,76 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.solr.analysis; + + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.opennlp.FilterPayloadsFilter; + import org.apache.lucene.analysis.util.ResourceLoader; + import org.apache.lucene.analysis.util.ResourceLoaderAware; + import org.apache.lucene.analysis.util.TokenFilterFactory; + + import java.io.UnsupportedEncodingException; + import java.util.Map; + + /** + * Factory for {@link StripPayloadsFilter}. + *
+  * <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+  *   <analyzer>
+  *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+  *     <filter class="solr.FilterPayloadsFilterFactory"/>
+  *   </analyzer>
+  * </fieldType>
+ * @version $Id$ + */ + public class FilterPayloadsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { + public String PAYLOAD_LIST = "payloadList"; + public String KEEP_PAYLOADS = "keepPayloads"; + byte[][] payloads = null; + boolean keepTokens = false; + + @Override + public void init(Map args) { + super.init(args); + assureMatchVersion(); + payloads = getPayloads(args); + keepTokens = getBoolean(KEEP_PAYLOADS, false); + } + + private byte[][] getPayloads(Map args) { + String payloadList = args.get(PAYLOAD_LIST); + String[] words = payloadList.split(","); + byte[][] list; + list = new byte[words.length][]; + for(int i = 0; i < words.length; i++) { + try { + list[i] = words[i].getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + return list; + } + + public void inform(ResourceLoader loader) { + } + + public FilterPayloadsFilter create(TokenStream input) { + FilterPayloadsFilter filterPayloadsFilter = new FilterPayloadsFilter(false, input, payloads, keepTokens); + return filterPayloadsFilter; + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPFilterFactory.java lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPFilterFactory.java *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPFilterFactory.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPFilterFactory.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,130 ---- + package org.apache.solr.analysis; + + + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + + import java.io.IOException; + import java.util.ArrayList; + import java.util.Map; + + import opennlp.tools.util.InvalidFormatException; + + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.opennlp.OpenNLPFilter; + import org.apache.lucene.analysis.opennlp.tools.NLPChunkerOp; + import org.apache.lucene.analysis.opennlp.tools.NLPNERTaggerOp; + import org.apache.lucene.analysis.opennlp.tools.NLPPOSTaggerOp; + import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory; + import org.apache.lucene.analysis.util.ResourceLoader; + import org.apache.lucene.analysis.util.ResourceLoaderAware; + import org.apache.lucene.analysis.util.TokenFilterFactory; + + /** + * Factory for {@link OpenNLPFilterFactory}. + *
+  * <fieldType name="text_onlp" class="solr.TextField" positionIncrementGap="100"
+  *   <analyzer>
+  *   <tokenizer class="solr.OpenNLPTokenizerFactory"
+  *     <tokenizerModel="filename"/>
+  *     <posTaggerModel="filename"/>
+  *     <chunkerModel="filename"/>
+  *     <nerTaggerModel="filename,filename,...,filename"/>
+  *   />
+  *   </analyzer>
+  * </fieldType>
+ *

+ * All-in-one OpenNLP Tokenizer/Tagger. Supports optional Parts-of-Speech, Chunking and Named Entity Recognition. + *
+ *
tokenizerModel is required. + *
POSTaggerModel/chunkerModel/NERTaggerModel are optional. + *
0 or more NERTaggerModels are accepted. They are run in sequence. + */ + public class OpenNLPFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { + + private String posTaggerModelFile = null; + private String chunkerModelFile = null; + private String[] nerTaggerModelFiles = null; + + @Override + public void init(Map args) { + super.init(args); + if (args.containsKey("posTaggerModel")) { + posTaggerModelFile = args.get("posTaggerModel"); + } + if (args.containsKey("chunkerModel")) { + chunkerModelFile = args.get("chunkerModel"); + } + if (args.containsKey("nerTaggerModels")) { + nerTaggerModelFiles = args.get("nerTaggerModels").split(","); + } + } + + @Override + public OpenNLPFilter create(TokenStream in) { + try { + NLPPOSTaggerOp posTaggerOp = null; + NLPChunkerOp chunkerOp = null; + ArrayList nerTaggerOps = null; + + if (posTaggerModelFile != null) { + posTaggerOp = OpenNLPOpsFactory.getPOSTagger(posTaggerModelFile); + } + if (chunkerModelFile != null) { + chunkerOp = OpenNLPOpsFactory.getChunker(chunkerModelFile); + } + if (nerTaggerModelFiles != null) { + nerTaggerOps = new ArrayList(); + for (String file: nerTaggerModelFiles) { + NLPNERTaggerOp op = OpenNLPOpsFactory.getNERTagger(file); + nerTaggerOps.add(op); + } + } + return new OpenNLPFilter(in, posTaggerOp, chunkerOp, nerTaggerOps); + } catch (InvalidFormatException e) { + throw new IllegalArgumentException(e); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + + @Override + public void inform(ResourceLoader loader) { + try { + // load and register read-only models in cache with file/resource names + if (posTaggerModelFile != null) { + OpenNLPOpsFactory.getPOSTaggerModel(posTaggerModelFile, loader.openResource(posTaggerModelFile)); + } + if (chunkerModelFile != null) { + OpenNLPOpsFactory.getChunkerModel(chunkerModelFile, loader.openResource(chunkerModelFile)); + } + if (nerTaggerModelFiles != null) { + new ArrayList(); + for (String file: nerTaggerModelFiles) { + OpenNLPOpsFactory.getNERTaggerModel(file, loader.openResource(file)); + } + } + } catch (InvalidFormatException e) { + throw new IllegalArgumentException(e); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPTokenizerFactory.java lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPTokenizerFactory.java *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPTokenizerFactory.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/OpenNLPTokenizerFactory.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,102 ---- + + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + package org.apache.solr.analysis; + + import java.io.IOException; + import java.io.Reader; + import java.util.Map; + + import opennlp.tools.util.InvalidFormatException; + + import org.apache.lucene.analysis.opennlp.OpenNLPTokenizer; + import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp; + import org.apache.lucene.analysis.opennlp.tools.NLPTokenizerOp; + import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory; + import org.apache.lucene.analysis.util.ResourceLoader; + import org.apache.lucene.analysis.util.ResourceLoaderAware; + import org.apache.lucene.analysis.util.TokenizerFactory; + + /** + * Factory for {@link OpenNLPTokenizerTaggerFactory}. + *

+  * <fieldType name="text_onlp" class="solr.TextField" positionIncrementGap="100"
+  *   <analyzer>
+  *   <tokenizer class="solr.OpenNLPTokenizerFactory"
+  *     <sentenceModel="filename"/>
+  *     <tokenizerModel="filename"/>
+  *     <splitPunctuation="true"/>
+  *   />
+  *   </analyzer>
+  * </fieldType>
+ *

+ * All-in-one OpenNLP Tokenizer/Tagger. + *
+ *
sentenceModel and/or tokenizerModel files are required. + */ + public class OpenNLPTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware { + public static final String SENTENCE_MODEL = "sentenceModel"; + public static final String TOKENIZER_MODEL = "tokenizerModel"; + + private String sentenceModelFile = null; + private String tokenizerModelFile = null; + private boolean splitPunctuation = false; + + @Override + public void init(Map args) { + super.init(args); + sentenceModelFile = args.get("sentenceModel"); + tokenizerModelFile = args.get("tokenizerModel"); + splitPunctuation = getBoolean("splitPunctuation", false); + } + + @Override + public OpenNLPTokenizer create(Reader in) { + try { + NLPSentenceDetectorOp sentenceOp = null; + NLPTokenizerOp tokenizerOp = null; + + sentenceOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile); + tokenizerOp = OpenNLPOpsFactory.getTokenizer(tokenizerModelFile); + return new OpenNLPTokenizer(in, sentenceOp, tokenizerOp, splitPunctuation); + } catch (InvalidFormatException e) { + throw new IllegalArgumentException(e); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + + @Override + public void inform(ResourceLoader loader) { + try { + // register models in cache with file/resource names + if (sentenceModelFile != null) { + OpenNLPOpsFactory.getSentenceModel(sentenceModelFile, loader.openResource(sentenceModelFile)); + } + if (tokenizerModelFile != null) { + OpenNLPOpsFactory.getTokenizerModel(tokenizerModelFile, loader.openResource(tokenizerModelFile)); + } + } catch (InvalidFormatException e) { + throw new IllegalArgumentException(e); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/StripPayloadsFilterFactory.java lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/StripPayloadsFilterFactory.java *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/java/org/apache/solr/analysis/StripPayloadsFilterFactory.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/java/org/apache/solr/analysis/StripPayloadsFilterFactory.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,56 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.solr.analysis; + + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.opennlp.StripPayloadsFilter; + import org.apache.lucene.analysis.util.ResourceLoader; + import org.apache.lucene.analysis.util.ResourceLoaderAware; + import org.apache.lucene.analysis.util.TokenFilterFactory; + + import java.util.Map; + + /** + * Factory for {@link StripPayloadsFilter}. + *

+  * <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+  *   <analyzer>
+  *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+  *     <filter class="solr.SomethingThatAddPayloads"/>
+  *     <filter class="solr.StripPayloadsFilterFactory"/>
+  *   </analyzer>
+  * </fieldType>
+ * @version $Id$ + */ + public class StripPayloadsFilterFactory extends TokenFilterFactory implements ResourceLoaderAware { + + @Override + public void init(Map args) { + super.init(args); + assureMatchVersion(); + } + + @Override + public void inform(ResourceLoader loader) { + } + + public StripPayloadsFilter create(TokenStream input) { + StripPayloadsFilter stripPayloadsFilter = new StripPayloadsFilter(input); + return stripPayloadsFilter; + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test/org/apache/solr/analysis/StreamMockSolrResourceLoader.java lucene_solr_4_1-org/solr/contrib/opennlp/src/test/org/apache/solr/analysis/StreamMockSolrResourceLoader.java *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test/org/apache/solr/analysis/StreamMockSolrResourceLoader.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test/org/apache/solr/analysis/StreamMockSolrResourceLoader.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,52 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package org.apache.solr.analysis; + + import java.io.FileInputStream; + import java.io.FileNotFoundException; + import java.io.InputStream; + + import org.apache.lucene.analysis.util.ResourceLoader; + + class StreamMockSolrResourceLoader implements ResourceLoader { + + @Override + public InputStream openResource(String resource) { + try { + if (resource.charAt(0) != '/') { + resource = getFullPath(resource); + } + return new FileInputStream(resource); + } catch (FileNotFoundException e) { + throw new IllegalArgumentException(e); + } + } + + // for some reason, solrCore object from test is not visible! + // This always runs under a no-name core, so solr.solr.home is main directory + private String getFullPath(String path) { + String home = System.getProperty("solr.solr.home"); + return home + "/conf/" + path; + } + + public T newInstance(String cname, Class expectedType) { + return null; + } + + } + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPFilterFactory.java lucene_solr_4_1-org/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPFilterFactory.java *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPFilterFactory.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPFilterFactory.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,308 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.solr.analysis; + + import java.io.IOException; + import java.io.StringReader; + import java.util.Arrays; + import java.util.HashMap; + import java.util.Map; + + import org.apache.lucene.analysis.BaseTokenStreamTestCase; + import org.apache.lucene.analysis.TokenStream; + import org.apache.lucene.analysis.Tokenizer; + import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; + import org.apache.lucene.util.BytesRef; + import org.apache.solr.SolrTestCaseJ4; + import org.junit.BeforeClass; + import org.junit.Test; + + /** + * Needs the OpenNLP Tokenizer because it creates full streams of punctuation. + * The POS, Chunking and NER models are based on this tokenization. + * + * Tagging models are created from tiny test data in contrib/opennlp/test-files/training and are not very accurate. + * Chunking in particular is garbage. + * NER training generally recognizes sentences that end with "Flashman." The period is required. + */ + + public class TestOpenNLPFilterFactory extends SolrTestCaseJ4 { + + static String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words."; + static String[] SENTENCES_punc = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."}; + static String[] SENTENCES_nopunc = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."}; + static int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57}; + static int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58}; + static String[] SENTENCES_posTags = {"NNS", "NN", "CD", "NNS", "CD", "NNS", ".", "VBD", "IN", "CD", ",", "CD", "NNS", "."}; + static String SENTENCES_chunks[] = {"I-NP", "I-NP", "I-NP", "I-NP", "I-NP", "I-NP", "O", "O", "B-PP", "B-NP", "O", "B-NP", "I-NP", "O"}; + + static String NAMES2 = "Royal Flash is a tale about Harry Flashman."; + static String[] NAMES2_OUT= {null, null, null, null, null, null, null, "PERSON", null}; + + static String SPLIT = "An A/B test."; + static String[] SPLIT_terms = {"An", "A", "/", "B", "test", "."}; + static int[] SPLIT_startOffsets = {0, 3, 4, 5, 7, 11}; + static int[] SPLIT_endOffsets = {2, 4, 5, 6, 11, 12}; + + static String NO_BREAK = "No period"; + static String[] NO_BREAK_terms = {"No", "period"}; + static int[] NO_BREAK_startOffsets = {0, 3}; + static int[] NO_BREAK_endOffsets = {2, 9}; + + + final boolean doSentences = false; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema.xml", getFile("opennlp/solr").getAbsolutePath()); + } + + @Test + public void testPassthrough() throws IOException { + OpenNLPTokenizerFactory tokf = getTokenizerFactory(); + + // Map args = new HashMap(); + StringReader inputReader = new StringReader(SENTENCES); + Tokenizer t = tokf.create(inputReader); + OpenNLPFilterFactory ff = new OpenNLPFilterFactory(); + TokenStream ts = ff.create(t); + walkTerms(ts, "none", SENTENCES_punc, null); + ts.reset(); + walkTerms(ts, "none", SENTENCES_punc, null); + BaseTokenStreamTestCase.assertTokenStreamContents(ts, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); + } + + public void testPOS() throws Exception { + OpenNLPTokenizerFactory tokf = getTokenizerFactory(); + OpenNLPFilterFactory ff = new OpenNLPFilterFactory(); + Map args = new HashMap(); + args.put("posTaggerModel", getFullPath("opennlp/en-test-pos-maxent.bin")); + ff.init(args); + ff.inform(new StreamMockSolrResourceLoader()); + + StringReader inputReader = new StringReader(SENTENCES); + Tokenizer t = tokf.create(inputReader); + TokenStream ts = ff.create(t); + walkTerms(ts, "pos", SENTENCES_punc, SENTENCES_posTags); + ts.reset(); + walkTerms(ts, "pos", SENTENCES_punc, SENTENCES_posTags); + } + + public void testChunking() throws Exception { + OpenNLPTokenizerFactory tokf = getTokenizerFactory(); + OpenNLPFilterFactory ff = new OpenNLPFilterFactory(); + Map args = new HashMap(); + args.put("posTaggerModel", getFullPath("opennlp/en-test-pos-maxent.bin")); + args.put("chunkerModel", getFullPath("opennlp/en-test-chunker.bin")); + ff.init(args); + ff.inform(new StreamMockSolrResourceLoader()); + + StringReader inputReader = new StringReader(SENTENCES); + Tokenizer t = tokf.create(inputReader); + TokenStream ts = ff.create(t); + walkTerms(ts, "chunks", SENTENCES_punc, SENTENCES_chunks); + ts.reset(); + walkTerms(ts, "chunks", SENTENCES_punc, SENTENCES_chunks); + } + + public void testNames() throws Exception { + OpenNLPTokenizerFactory tokf = getTokenizerFactory(); + OpenNLPFilterFactory ff = new OpenNLPFilterFactory(); + Map args = new HashMap(); + args.put("nerTaggerModels", getFullPath("opennlp/en-test-ner-person.bin")); + ff.init(args); + ff.inform(new StreamMockSolrResourceLoader()); + + StringReader inputReader = new StringReader(NAMES2); + Tokenizer t = tokf.create(inputReader); + TokenStream ts = ff.create(t); + walkTerms(ts, "names", null, NAMES2_OUT); + } + + public void testNoBreak() throws Exception { + OpenNLPTokenizerFactory tokf = getTokenizerFactory(); + OpenNLPFilterFactory ff = new OpenNLPFilterFactory(); + + StringReader inputReader = new StringReader(NO_BREAK); + Tokenizer t = tokf.create(inputReader); + TokenStream ts = ff.create(t); + BaseTokenStreamTestCase.assertTokenStreamContents(ts, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets); + } + + OpenNLPTokenizerFactory getTokenizerFactory() throws IOException { + OpenNLPTokenizerFactory tokf = new OpenNLPTokenizerFactory(); + Map args = new HashMap(); + args.put("tokenizerModel", getFullPath("opennlp/en-test-tokenizer.bin")); + tokf.init(args); + tokf.inform(new StreamMockSolrResourceLoader()); + return tokf; + } + + void walkTerms(TokenStream ts, String op, String[] terms, String[] tags) throws IOException { + int term = 0; + while (ts.incrementToken()) { + CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + String word = new String(termAtt.buffer()); + word = word.substring(termAtt.length()); + word = termAtt.toString(); + if (terms != null) { + assertEquals("term #" + term, terms[term], word); + } + if (tags != null) { + if (tags[term] == null) { + assertEquals("term #" + term+ ", " + op + " #" + term + ": ", tags[term], null); + } else { + PayloadAttribute p = ts.getAttribute(PayloadAttribute.class); + BytesRef payload = p.getPayload(); + byte[] data = Arrays.copyOfRange(payload.bytes, payload.offset, payload.offset + payload.length); + assertEquals("term #" + term+ ", " + op + " #" + term + ": ", tags[term], (data != null) ? new String(data, "UTF-8") : null); + } + } + term++; + } + if (terms != null) { + assertEquals( "# of terms in paragraph", terms.length, term); + } + } + + private String getFullPath(String path) throws IOException { + String home = System.getProperty("solr.solr.home"); + return home + "/collection1/conf/" + path; + } + + String longtext = "Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government.\n" + + "The Iranian news agency IRNA, in a report received in London, said the operation code-named Karbala-5 launched into Iraq on January 9 was now over.\n" + + "It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had \"dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war.\"\n" + + "The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq.\n" + + "\"The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army...,\" said the statement quoted by IRNA.\n" + + "It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to \"come to their senses\" and discontinue support for what it called the tottering regime in Iraq.\n" + + "Iran said its forces had \"liberated\" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands, townships, rivers and part of a road leading into Basra.\n" + + "The Iranian forces \"are in full control of these areas,\" the statement said.\n" + + "It said 81 Iraqi brigades and battalions were totally destroyed, along with 700 tanks and 1,500 other vehicles. The victory list also included 80 warplanes downed, 250 anti- aircraft guns and 400 pieces of military hardware destroyed and the seizure of 220 tanks and armoured personnel carriers. \n" + + "\n" + + "U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25, the Federal Reserve said.\n" + + "The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs, with extended credits up 10 mln dlrs at 304 mln dlrs. The week was the second half of a two-week statement period. Net borrowings in the prior week averaged 451 mln dlrs.\n" + + "Commenting on the two-week statement period ended February 25, the Fed said that banks had average net free reserves of 644 mln dlrs a day, down from 1.34 billion two weeks earlier.\n" + + "A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday.\n" + + "He said that natural float had been \"acting a bit strangely\" for this time of year, noting that there had been poor weather during the latest week.\n" + + "The spokesman said that natural float ranged from under 500 mln dlrs on Friday, for which he could give no reason, to nearly one billion dlrs on both Thursday and Wednesday.\n" + + "The Fed spokeman could give no reason for Thursday's high float, but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts.\n" + + "For the week as a whole, he said that float related as of adjustments were \"small,\" adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country.\n" + + "The spokesman said that on both Tuesday and Wednesday, two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days.\n" + + "However, he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact.\n" + + "During the week ended Wednesday, 45 pct of net discount window borrowings were made by the smallest banks, with 30 pct by the 14 large money center banks and 25 pct by large regional institutions.\n" + + "On Wednesday, 55 pct of the borrowing was accounted for by the money center banks, with 30 pct by the large regionals and 15 pct by the smallest banks.\n" + + "The Fed spokesman said the banking system had excess reserves on Thursday, Monday and Tuesday and a deficit on Friday and Wedndsday. That produced a small daily average deficit for the week as a whole.\n" + + "For the two-week period, he said there were relatively high excess reserves on a daily avearge, almost all of which were at the smallest banks. Reuter \n" + + "\n" + + "American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock.\n" + + "American Express stock got a lift from the rumor, as the market calculated a partially public Shearson may command a good market value, thereby boosting the total value of American Express. The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend.\n" + + "American Express closed on the New York Stock Exchange at 72-5/8, up 4-1/8 on heavy volume.\n" + + "American Express would not comment on the rumors or its stock activity.\n" + + "Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes.\n" + + "At the meeting, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson, according to analysts.\n" + + "Yesterday, Shearson said it was elevating its chief operating officer, Jeffery Lane, to the added position of president, which had been vacant. It also created four new positions for chairmen of its operating divisions.\n" + + "Analysts speculated a partial spinoff would make most sense, contrary to one variation on market rumors of a total spinoff.\n" + + "Some analysts, however, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express, contributing about 20 pct of earnings last year.\n" + + "\"I think it is highly unlikely that American Express is going to sell shearson,\" said Perrin Long of Lipper Analytical. He questioned what would be a better investment than \"a very profitable securities firm.\"\n" + + "Several analysts said American Express is not in need of cash, which might be the only reason to sell a part of a strong asset.\n" + + "But others believe the company could very well of considered the option of spinning out part of Shearson, and one rumor suggests selling about 20 pct of it in the market.\n" + + "Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past.\n" + + "\"Shearson being as profitable as it is would have fetched a big premium in the market place. Shearson's book value is in the 1.4 mln dlr range. Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization,\" said Eckenfelder.\n" + + "Some analysts said American Express could use capital since it plans to expand globally.\n" + + "\"They have enormous internal growth plans that takes capital. You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road,\" said E.F. Hutton Group analyst Michael Lewis.\n" + + "\"They've outlined the fact that they're investing heavily in the future, which goes heavily into the international arena,\" said Lewis. \"...That does not preclude acquisitions and divestitures along the way,\" he said.\n" + + "Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson, its stock might better reflect other assets, such as the travel related services business.\n" + + "\"It could find its true water mark with a lesser exposure to brokerage. The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company,\" he said.\n" + + "Lewis said Shearson contributed 316 mln in after-tax operating earnings, up from about 200 mln dlrs in 1985.\n" + + "Reuter \n" + + "\n" + + "Coleco Industries Inc said it expects to return to profitability in 1987.\n" + + "Earlier, Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier.\n" + + "In a prepared statement, the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs.\n" + + "Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year.\n" + + "Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability, it said.\n" + + "At the annual Toy Fair earlier this month, vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable.\n" + + "Venezuela is seeking a 'constructive and flexible' attitude from its creditor banks in current talks to reschedule 21 billion dlrs in foreign debt, finance minister manuel azpurua told a press conference.\n" + + "He declined to comment on meetings this week in new york between public finances director jorge marcano and venezuela's 13-bank advisory committee except to say, \"they are progressing.\"\n" + + "Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments, but each country must negotiate according to its own interest.\n" + + "Asked to comment on chile's agreement with its creditors today, which includes an interest rate margin of one pct over libor, azpurua said only, \"that is good news.\"\n" + + "According to banking sources, the banks' latest offer to venezuela is also a one pct margin as against the last february's 1-1/8 pct rescheduling accord and the 7/8 pct Venezuela wants.\n" + + "Azpurua said four basic elements are being negotiated with the banks now: spread reduction, deferral of principal payments due in 1987 and 1988, lenghtening the 12-1/2 year repayment schedule, and debt capitalization schemes.\n" + + "Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year. It was due to amortize 1.05 billion dlrs under the rescheduling, and pay 420 mln dlrs in non-restructured principal, both public sector.\n" + + "He said venezuela's original proposal was to pay no principal on restructured debt this year, but is now insisting that if it makes payments they be compensated by new bank loans.\n" + + "The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year, but that no direct commitment was likely on new loans.\n" + + "\"debtors and bank creditors have a joint responsibility and there will be no lasting solution unless a positive flow of financing is guaranteed,\" azpurua said.\n" + + "However, he appeared to discard earlier venezuelan proposals for a direct link between oil income and debt payments, \"because circumstances change too quickly.\"\n" + + "At the same time, he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt, based on experience in other countries. The rules would be published by the finance ministry and the central bank. \n" + + "\n" + + "Thomson McKinnon Mortgage Assets Corp, a unit of Thomson McKinnon Inc, is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS.\n" + + "The floating rate class amounts to 60 mln dlrs. It has an average life of 7.11 years and matures 2018. The CMOs have an initial coupon of 7.0375 pct, which will be reset 60 basis points above LIBOR, said sole manager Thomson McKinnon.\n" + + "The inverse floater totals 4.8 mln dlrs. It has an average life of 13.49 years and matures 2018. These CMOs were given an initial coupon of 11-1/2 pct and priced at 104.40.\n" + + "Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct).\n" + + "A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined. \"The yield floats opposite of LIBOR,\" he said.\n" + + "The fixed-rate tranche totals 35.2 mln dlrs. It has an average life of 3.5 years and matures 2016. The CMOs were assigned a 7.65 pct coupon and par pricing.\n" + + "The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp, Freddie Mac, certificates. \n" + + "\n" + + "OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices, oil industry analysts said.\n" + + "\"The movement to higher oil prices was never to be as easy as OPEC thought. They may need an emergency meeting to sort out the problems,\" said Daniel Yergin, director of Cambridge Energy Research Associates, CERA.\n" + + "Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets.\n" + + "\"OPEC's problem is not a price problem but a production issue and must be addressed in that way,\" said Paul Mlotok, oil analyst with Salomon Brothers Inc.\n" + + "He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices.\n" + + "But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December.\n" + + "\"OPEC has to learn that in a buyers market you cannot have deemed quotas, fixed prices and set differentials,\" said the regional manager for one of the major oil companies who spoke on condition that he not be named. \"The market is now trying to teach them that lesson again,\" he added.\n" + + "David T. Mizrahi, editor of Mideast reports, expects OPEC to meet before June, although not immediately. However, he is not optimistic that OPEC can address its principal problems.\n" + + "\"They will not meet now as they try to take advantage of the winter demand to sell their oil, but in late March and April when demand slackens,\" Mizrahi said.\n" + + "But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd.\"\n" + + "Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output.\n" + + "\"OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then,\" said Dillard Spriggs of Petroleum Analysis Ltd in New York.\n" + + "But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production.\n" + + "\"Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production,\" he told Reuters in a telephone interview. \n" + + "\n" + + "BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance, banking analysts said.\n" + + "Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering, which has yet to be approved by the Securities and Exchange Commission.\n" + + "BankAmerica stock fell this week, along with other banking issues, on the news that Brazil has suspended interest payments on a large portion of its foreign debt.\n" + + "The stock traded around 12, down 1/8, this afternoon, after falling to 11-1/2 earlier this week on the news.\n" + + "Banking analysts said that with the immediate threat of the First Interstate Bancorp takeover bid gone, BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term.\n" + + "BankAmerica filed the offer on January 26. It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9.\n" + + "A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated.\n" + + "\"The circumstances at the time will determine what we do,\" said Arthur Miller, BankAmerica's Vice President for Financial Communications, when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval.\n" + + "\"I'd put it off as long as they conceivably could,\" said Lawrence Cohn, analyst with Merrill Lynch, Pierce, Fenner and Smith.\n" + + "Cohn said the longer BankAmerica waits, the longer they have to show the market an improved financial outlook.\n" + + "Although BankAmerica has yet to specify the types of equities it would offer, most analysts believed a convertible preferred stock would encompass at least part of it.\n" + + "Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders, noted Daniel Williams, analyst with Sutro Group.\n" + + "Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter, the initial shock reaction is likely to ease over the coming weeks.\n" + + "Nevertheless, BankAmerica, which holds about 2.70 billion dlrs in Brazilian loans, stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt, and as much as 200 mln dlrs if Brazil pays no interest for a year, said Joseph Arsenio, analyst with Birr, Wilson and Co.\n" + + "He noted, however, that any potential losses would not show up in the current quarter.\n" + + "\n" + + "The Federal Deposit Insurance Corp (FDIC) said three troubled banks in Texas and Louisiana were merged with healthy financial institutions.\n" + + "The FDIC said it subsidized the merger of Central Bank and Trust Co, Glenmora, La., with the healthy Peoples Bank and Trust Co, Natchitoches, La., after state regulators notified it that Central was in danger of failing.\n" + + "Central had assets of 28.3 mln dlrs.\n" + + "The FDIC said the deposits of the failed Farmers State Bank, Hart, Tex., were assumed by Hale County State Bank, Plainview, Tex.\n" + + "Farmers, with 9.6 mln dlrs in assets, was closed by Texas bank regulators.\n" + + "The deposits of the failed First National Bank of Crosby, Crosby, Tex., with total assets of 8.2 mln dlrs, were assumed by Central Bancshares of the South Inc, Birmingham, Ala., after First National was closed by federal bank regulators, the FDIC said.\n" + + "Brazil's 14-bank advisory committee expressed \"grave concern\" to chief debt negotiator Antonio Padua de Seixas over the country's suspension of interest payments, according to a telex from committee chairman Citibank to creditor banks worldwide.\n" + + "Bankers said the diplomatic phrase belied the deep anger and frustration on the committee over Brazil's unilateral move last Friday and its subsequent freeze on some 15 billion dlrs of short-term trade and interbank lines.\n" + + "Seixas, director of the Brazilian central bank's foreign debt department, met the full panel on Tuesday and Wednesday.\n" + + "Seixas, who met again this morning with senior Citibank executive William Rhodes and representatives from committee vice-chairmen Morgan Guaranty Trust Co and Lloyds Bank Plc, told the banks that the government was preparing a telex to explain and clarify the freeze on short-term credits.\n" + + "The telex could be sent to creditors as early as today, bankers said.\n" + + "Despite the rising tempers, bankers said there are no plans for Brazilian finance minister Dilson Funaro to meet commercial bankers during his trip to Washington on Friday and Saturday.\n" + + "Funaro will be explaining Brazil's actions to U.S. Treasury Secretary James Baker, Federal Reserve Board chairman Paul Volcker and International Monetary Fund managing director Michel Camdessus before travelling to Europe at the weekend.\n" + + ""; + + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPTokenizerFactory.java lucene_solr_4_1-org/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPTokenizerFactory.java *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPTokenizerFactory.java Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test/org/apache/solr/analysis/TestOpenNLPTokenizerFactory.java Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,120 ---- + /** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.solr.analysis; + + import java.io.IOException; + import java.io.StringReader; + import java.util.HashMap; + import java.util.Map; + + import org.apache.lucene.analysis.BaseTokenStreamTestCase; + import org.apache.lucene.analysis.Tokenizer; + import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + import org.apache.solr.SolrTestCaseJ4; + import org.junit.BeforeClass; + import org.junit.Test; + + /** + * Tests the Tokenizer as well- the Tokenizer needs the OpenNLP model files, + * which this can load from src/test-files/opennlp/solr/conf + * + */ + + public class TestOpenNLPTokenizerFactory extends SolrTestCaseJ4 { + + static String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words."; + static String[] SENTENCES_split = {"Sentence number 1 has 6 words.", "Sentence number 2, 5 words."}; + static String[] SENTENCES_punc = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."}; + static String[] SENTENCES_nopunc = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."}; + static int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57}; + static int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58}; + + final boolean doSentences = false; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema.xml", getFile("opennlp/solr").getAbsolutePath()); + } + + @Test + public void testTokenizer() throws IOException { + OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(); + Map args = new HashMap(); + args.put("sentenceModel", getFullPath("opennlp/en-test-sent.bin")); + args.put("tokenizerModel", getFullPath("opennlp/en-test-tokenizer.bin")); + factory.init(args); + factory.inform(new StreamMockSolrResourceLoader()); + + StringReader inputReader = new StringReader(SENTENCES); + Tokenizer ts = factory.create(inputReader); + walkTerms(ts, "none", SENTENCES_punc); + ts.reset(); + walkTerms(ts, "none", SENTENCES_punc); + ts.reset(); + BaseTokenStreamTestCase.assertTokenStreamContents(ts, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); + } + + @Test + public void testTokenizerNoSentenceDetector() throws IOException { + OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(); + Map args = new HashMap(); + args.put("tokenizerModel", getFullPath("opennlp/en-test-tokenizer.bin")); + factory.init(args); + factory.inform(new StreamMockSolrResourceLoader()); + + StringReader inputReader = new StringReader(SENTENCES); + Tokenizer ts = factory.create(inputReader); + walkTerms(ts, "none", SENTENCES_punc); + ts.reset(); + walkTerms(ts, "none", SENTENCES_punc); + BaseTokenStreamTestCase.assertTokenStreamContents(ts, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); + } + + @Test + public void testTokenizerNoTokenizer() throws IOException { + OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(); + Map args = new HashMap(); + args.put("sentenceModel", getFullPath("opennlp/en-test-sent.bin")); + factory.init(args); + factory.inform(new StreamMockSolrResourceLoader()); + + StringReader inputReader = new StringReader(SENTENCES); + Tokenizer ts = factory.create(inputReader); + + walkTerms(ts, "none", SENTENCES_split); + ts.reset(); + walkTerms(ts, "none", SENTENCES_split); + } + + void walkTerms(Tokenizer ts, String op, String[] terms) throws IOException { + int term = 0; + while (ts.incrementToken()) { + CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + String word = new String(termAtt.buffer()); + word = word.substring(termAtt.length()); + word = termAtt.toString(); + assertEquals("", terms[term], word); + term++; + } + assertEquals( "# of terms in paragraph", terms.length, term); + } + + private String getFullPath(String path) throws IOException { + String home = System.getProperty("solr.solr.home"); + return home + "/collection1/conf/" + path; + } + } diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/.gitignore lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/.gitignore *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/.gitignore Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/.gitignore Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + *.bin diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/README.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/README.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/README.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/opennlp/README.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,2 ---- + Test versions of OpenNLP models. + Only for unit tests, not production-quality. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/schema.xml lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/schema.xml *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/schema.xml Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/schema.xml Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,427 ---- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + text + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/solrconfig.xml lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/solrconfig.xml *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/solrconfig.xml Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/opennlp/solr/collection1/conf/solrconfig.xml Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,69 ---- + + + + + + + + + + ${solr.data.dir:./solr/data} + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + explicit + 10 + + + + + true + + + + + + + + + + max-age=30, public + + + + + + lang_id + + + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/README.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/README.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/README.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/README.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Use small training data to create small models for unit tests. + Training data derived from Reuters corpus in very unscientific way. + Tagging done with CCG Urbana-Champaign online demos: + http://cogcomp.cs.illinois.edu/page/demos + + run bin/trainall.sh from this directory. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/bin/trainall.sh lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/bin/trainall.sh *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/bin/trainall.sh Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/bin/trainall.sh Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,29 ---- + #!/usr/bin/env + + # Train all small test models for unit tests + # This does not create real NLP models, just small unencumbered ones for the unit tests. + # All text taken from reuters corpus. + # Tags applied with online demos at CCG Urbana-Champaign. + + # http://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.sentdetect.training + + java -cp "../../../lib/*" opennlp.tools.cmdline.CLI SentenceDetectorTrainer -lang en -data sentences.txt -model ../opennlp/solr/collection1/conf/opennlp/en-test-sent.bin + + # http://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.tokenizer.training + + java -cp "../../../lib/*" opennlp.tools.cmdline.CLI TokenizerTrainer -lang en -data tokenizer.txt -model ../opennlp/solr/collection1/conf/opennlp/en-test-tokenizer.bin + + # http://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.postagger.training + + java -cp "../../../lib/*" opennlp.tools.cmdline.CLI POSTaggerTrainer -lang en -data pos.txt -model ../opennlp/solr/collection1/conf/opennlp/en-test-pos-maxent.bin + + # http://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.chunking.training + + java -cp "../../../lib/*" opennlp.tools.cmdline.CLI ChunkerTrainerME -lang en -data chunks.txt -model ../opennlp/solr/collection1/conf/opennlp/en-test-chunker.bin + + # http://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind.training + # maxent won't work on small training set. use perceptron, train on one word. + + RUN="-iterations 50 -cutoff 1" + java -cp "../../../lib/*" opennlp.tools.cmdline.CLI TokenNameFinderTrainer -params ner/TrainerParams.txt -type person $RUN -lang en -data ner/ner_flashman.txt -model ../opennlp/solr/collection1/conf/opennlp/en-test-ner-person.bin + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/chunker.csv lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/chunker.csv *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/chunker.csv Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/chunker.csv Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + Showers,NNS continued,VBD throughout,IN the,DT week,NN in,IN the,DT Bahia,NNP cocoa,NN zone,NN , alleviating,VBG the,DT drought,NN since,IN early,JJ January,NNP and,CC improving,VBG prospects,NNS for,IN the,DT coming,VBG temporao,NN , although,IN normal,JJ humidity,NN levels,NNS have,VBP not,RB been,VBN restored,VBN , Comissaria,NNP Smith,NNP said,VBD in,IN PRP$its, weekly,JJ review,NN .,. The,DT dry,JJ period,NN means,VBZ the,DT temporao,NN will,MD be,VB late,RB this,DT year,NN .,. Arrivals,NNS for,IN the,DT week,NN ended,VBN February,NNP 22,CD were,VBD 155, bags,NNS of,IN 60,CD kilos,NN making,VBG a,DT cumulative,JJ total,NN for,IN the,DT season,NN of,IN 5.93,CD mln,NN against,IN 5.81,CD at,IN the,DT same,JJ stage,NN last,JJ year,NN .,. Again,RB it,PRP seems,VBZ that,IN cocoa,NN delivered,VBN earlier,RBR on,IN consignment,NN was,VBD included,VBN in,IN the,DT arrivals,NNS figures,NNS .,. Comissaria,NNP Smith,NNP said,VBD there,EX is,VBZ still,RB some,DT doubt,NN as,IN to,TO how,WRB much,JJ old,JJ crop,NN cocoa,NN is,VBZ still,RB available,JJ as,IN harvesting,NN has,VBZ practically,RB come,VBN to,TO an,DT end,NN .,. With,IN total,JJ Bahia,NNP crop,NN estimates,NNS around,IN 6.4,CD mln,NN bags,NNS and,CC sales,NNS standing,VBG at,IN almost,RB 6.2,CD mln,NN there,EX are,VBP a,DT few,JJ hundred,CD thousand,CD bags,NNS still,RB in,IN the,DT hands,NNS of,IN farmers,NNS , middlemen,NNS , exporters,NNS and,CC processors,NNS .,. There,EX are,VBP doubts,NNS as,IN to,TO how,WRB much,RB of,IN this,DT cocoa,NN would,MD be,VB fit,NN for,IN export,NN as,IN shippers,NNS are,VBP now,RB experiencing,VBG dificulties,NNS in,IN obtaining,VBG +/NN/Bahia,NN superior/NN/+,JJ certificates,NNS .,. In,IN view,NN of,IN the,DT lower,JJR quality,NN over,IN recent,JJ weeks,NNS farmers,NNS have,VBP sold,VBN a,DT good,JJ part,NN of,IN PRP$their, cocoa,NN held,VBN on,IN consignment,NN .,. Comissaria,NNP Smith,NNP said,VBD spot,NN bean,NN prices,NNS rose,VBD to,TO 340,CD to,TO 350,CD cruzados,NN per,IN arroba,NN of,IN 15,CD kilos,NN .,. Bean,NNP shippers,NNS were,VBD reluctant,JJ to,TO offer,VB nearby,JJ shipment,NN and,CC only,RB limited,JJ sales,NNS were,VBD booked,VBN for,IN March,NNP shipment,NN at,IN 1, to,TO 1, dlrs,NN per,IN tonne,NN to,TO ports,NNS to,TO be,VB named,VBN .,. New,JJ crop,NN sales,NNS were,VBD also,RB light,JJ and,CC all,DT to,TO open,JJ ports,NNS with,IN going,VBG at,IN 1, and,CC 1, dlrs,NN and,CC at,IN 35,CD and,CC 45,CD dlrs,NN under,IN New,NNP York,NNP july,NN , at,IN 1, , 1, and,CC 1, dlrs,NN per,IN tonne,NN FOB,NNP .,. Routine,JJ sales,NNS of,IN butter,NN were,VBD made,VBN sold,VBD at,IN 4, , 4, and,CC 4, dlrs,NN .,. butter,NN went,VBD at,IN 2.27,CD times,NNS New,NNP York,NNP May,NNP , at,IN 4, and,CC 4, dlrs,NN , at,IN 4, to,TO 4, dlrs,NN and,CC at,IN 2.27,CD and,CC 2.28,CD times,NNS New,NNP York,NNP Sept,NNP and,CC at,IN 4, dlrs,NN and,CC 2.27,CD times,NNS New,NNP York,NNP Dec,NNP , Comissaria,NNP Smith,NNP said,VBD .,. Destinations,NNS were,VBD the,DT U.S.,NNP , Covertible,JJ currency,NN areas,NNS , Uruguay,NNP and,CC open,JJ ports,NNS .,. Cake,NNP sales,NNS were,VBD registered,VBN at,IN 785,CD to,TO 995,CD dlrs,NN for,IN , 785,CD dlrs,NN for,IN May,NNP , 753,CD dlrs,NN for,IN Aug,NNP and,CC 0.39,CD times,NNS New,NNP York,NNP Dec,NNP for,IN .,. Buyers,NNS were,VBD the,DT U.S.,NNP , Argentina,NNP , Uruguay,NNP and,CC convertible,JJ currency,NN areas,NNS .,. Liquor,NNP sales,NNS were,VBD limited,VBN with,IN selling,VBG at,IN 2, and,CC 2, dlrs,NN , at,IN 2, dlrs,NN and,CC at,IN 1.25,CD times,NNS New,NNP York,NNP July,NNP , at,IN 2, dlrs,NN and,CC at,IN 1.25,CD times,NNS New,NNP York,NNP Sept,NNP and,CC at,IN 1.25,CD times,NNS New,NNP York,NNP Dec,NNP , Comissaria,NNP Smith,NNP said,VBD .,. Total,JJ Bahia,NN sales,NNS are,VBP currently,RB estimated,VBN at,IN 6.13,CD mln,NN bags,NNS against,IN the,DT 1986/87,NN crop,NN and,CC 1.06,CD mln,NN bags,NNS against,IN the,DT 1987/88,NN crop,NN .,. Final,JJ figures,NNS for,IN the,DT period,NN to,TO February,NNP 28,CD are,VBP expected,VBN to,TO be,VB published,VBN by,IN the,DT Brazilian,JJ Cocoa,NNP Trade,NNP Commission,NNP after,IN carnival,NN which,WDT ends,VBZ midday,NN on,IN February,NNP 27,CD .,. \ No newline at end of file diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/chunks.csv lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/chunks.csv *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/chunks.csv Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/chunks.csv Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + Showers,NNS continued,VBD throughout,IN the,DT week,NN in,IN the,DT Bahia,NNP cocoa,NN zone,NN , alleviating,VBG the,DT drought,NN since,IN early,JJ January,NNP and,CC improving,VBG prospects,NNS for,IN the,DT coming,VBG temporao,NN , although,IN normal,JJ humidity,NN levels,NNS have,VBP not,RB been,VBN restored,VBN , Comissaria,NNP Smith,NNP said,VBD in,IN PRP$its, weekly,JJ review,NN .,. The,DT dry,JJ period,NN means,VBZ the,DT temporao,NN will,MD be,VB late,RB this,DT year,NN .,. Arrivals,NNS for,IN the,DT week,NN ended,VBN February,NNP 22,CD were,VBD 155, bags,NNS of,IN 60,CD kilos,NN making,VBG a,DT cumulative,JJ total,NN for,IN the,DT season,NN of,IN 5.93,CD mln,NN against,IN 5.81,CD at,IN the,DT same,JJ stage,NN last,JJ year,NN .,. Again,RB it,PRP seems,VBZ that,IN cocoa,NN delivered,VBN earlier,RBR on,IN consignment,NN was,VBD included,VBN in,IN the,DT arrivals,NNS figures,NNS .,. Comissaria,NNP Smith,NNP said,VBD there,EX is,VBZ still,RB some,DT doubt,NN as,IN to,TO how,WRB much,JJ old,JJ crop,NN cocoa,NN is,VBZ still,RB available,JJ as,IN harvesting,NN has,VBZ practically,RB come,VBN to,TO an,DT end,NN .,. With,IN total,JJ Bahia,NNP crop,NN estimates,NNS around,IN 6.4,CD mln,NN bags,NNS and,CC sales,NNS standing,VBG at,IN almost,RB 6.2,CD mln,NN there,EX are,VBP a,DT few,JJ hundred,CD thousand,CD bags,NNS still,RB in,IN the,DT hands,NNS of,IN farmers,NNS , middlemen,NNS , exporters,NNS and,CC processors,NNS .,. There,EX are,VBP doubts,NNS as,IN to,TO how,WRB much,RB of,IN this,DT cocoa,NN would,MD be,VB fit,NN for,IN export,NN as,IN shippers,NNS are,VBP now,RB experiencing,VBG dificulties,NNS in,IN obtaining,VBG +/NN/Bahia,NN superior/NN/+,JJ certificates,NNS .,. In,IN view,NN of,IN the,DT lower,JJR quality,NN over,IN recent,JJ weeks,NNS farmers,NNS have,VBP sold,VBN a,DT good,JJ part,NN of,IN PRP$their, cocoa,NN held,VBN on,IN consignment,NN .,. Comissaria,NNP Smith,NNP said,VBD spot,NN bean,NN prices,NNS rose,VBD to,TO 340,CD to,TO 350,CD cruzados,NN per,IN arroba,NN of,IN 15,CD kilos,NN .,. Bean,NNP shippers,NNS were,VBD reluctant,JJ to,TO offer,VB nearby,JJ shipment,NN and,CC only,RB limited,JJ sales,NNS were,VBD booked,VBN for,IN March,NNP shipment,NN at,IN 1, to,TO 1, dlrs,NN per,IN tonne,NN to,TO ports,NNS to,TO be,VB named,VBN .,. New,JJ crop,NN sales,NNS were,VBD also,RB light,JJ and,CC all,DT to,TO open,JJ ports,NNS with,IN going,VBG at,IN 1, and,CC 1, dlrs,NN and,CC at,IN 35,CD and,CC 45,CD dlrs,NN under,IN New,NNP York,NNP july,NN , at,IN 1, , 1, and,CC 1, dlrs,NN per,IN tonne,NN FOB,NNP .,. Routine,JJ sales,NNS of,IN butter,NN were,VBD made,VBN sold,VBD at,IN 4, , 4, and,CC 4, dlrs,NN .,. butter,NN went,VBD at,IN 2.27,CD times,NNS New,NNP York,NNP May,NNP , at,IN 4, and,CC 4, dlrs,NN , at,IN 4, to,TO 4, dlrs,NN and,CC at,IN 2.27,CD and,CC 2.28,CD times,NNS New,NNP York,NNP Sept,NNP and,CC at,IN 4, dlrs,NN and,CC 2.27,CD times,NNS New,NNP York,NNP Dec,NNP , Comissaria,NNP Smith,NNP said,VBD .,. Destinations,NNS were,VBD the,DT U.S.,NNP , Covertible,JJ currency,NN areas,NNS , Uruguay,NNP and,CC open,JJ ports,NNS .,. Cake,NNP sales,NNS were,VBD registered,VBN at,IN 785,CD to,TO 995,CD dlrs,NN for,IN , 785,CD dlrs,NN for,IN May,NNP , 753,CD dlrs,NN for,IN Aug,NNP and,CC 0.39,CD times,NNS New,NNP York,NNP Dec,NNP for,IN .,. Buyers,NNS were,VBD the,DT U.S.,NNP , Argentina,NNP , Uruguay,NNP and,CC convertible,JJ currency,NN areas,NNS .,. Liquor,NNP sales,NNS were,VBD limited,VBN with,IN selling,VBG at,IN 2, and,CC 2, dlrs,NN , at,IN 2, dlrs,NN and,CC at,IN 1.25,CD times,NNS New,NNP York,NNP July,NNP , at,IN 2, dlrs,NN and,CC at,IN 1.25,CD times,NNS New,NNP York,NNP Sept,NNP and,CC at,IN 1.25,CD times,NNS New,NNP York,NNP Dec,NNP , Comissaria,NNP Smith,NNP said,VBD .,. Total,JJ Bahia,NN sales,NNS are,VBP currently,RB estimated,VBN at,IN 6.13,CD mln,NN bags,NNS against,IN the,DT 1986/87,NN crop,NN and,CC 1.06,CD mln,NN bags,NNS against,IN the,DT 1987/88,NN crop,NN .,. Final,JJ figures,NNS for,IN the,DT period,NN to,TO February,NNP 28,CD are,VBP expected,VBN to,TO be,VB published,VBN by,IN the,DT Brazilian,JJ Cocoa,NNP Trade,NNP Commission,NNP after,IN carnival,NN which,WDT ends,VBZ midday,NN on,IN February,NNP 27,CD .,. \ No newline at end of file diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/chunks.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/chunks.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/chunks.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/chunks.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,3552 ---- + Iran NNP B-NP + announced VBD B-VP + tonight NN B-NP + that IN B-PP + its NNS B-NP + major JJ B-NP + offensive NN I-NP + against IN B-PP + Iraq NNP B-NP + in IN B-PP + the DT B-NP + Gulf NNP I-NP + war NN I-NP + had VBD B-VP + ended VBN I-VP + after IN B-PP + dealing VBG B-VP + savage JJ B-NP + blows NNS I-NP + against IN B-PP + the DT B-NP + Baghdad NNP I-NP + government NN I-NP + . . O + The DT B-NP + Iranian JJ I-NP + news NN I-NP + agency NN I-NP + IRNA NNP I-NP + , , O + in IN B-PP + a DT B-NP + report NN I-NP + received VBN B-VP + in IN B-PP + London NNP B-NP + , , O + said VBD B-VP + the DT B-NP + operation NN I-NP + code-named VBN B-VP + Karbala-5 CD B-NP + launched VBD B-VP + into IN B-PP + Iraq NNP B-NP + on IN B-PP + January NNP B-NP + 9 CD I-NP + was VBD B-VP + now RB B-ADVP + over RP B-NP + . . O + It PRP B-NP + quoted VBD B-VP + a DT B-NP + joint NN I-NP + statewment NN I-NP + by IN B-PP + the DT B-NP + Iranian JJ I-NP + Army NNP I-NP + and CC I-NP + Revolutionary NNP I-NP + Guards NNPS I-NP + Corps NNP I-NP + as IN B-PP + saying VBG B-VP + that IN B-SBAR + their DT B-NP + forces NNS I-NP + had VBD B-VP + " JJ B-NP + dealt VBD B-VP + one CD B-NP + of IN B-PP + the DT B-NP + severest JJS I-NP + blows NNS I-NP + on IN B-PP + the DT B-NP + Iraqi JJ I-NP + war NN I-NP + machine NN I-NP + in IN B-PP + the DT B-NP + history NN I-NP + of IN B-PP + the DT B-NP + Iraq-imposed JJ I-NP + war NN I-NP + . . O + " NN B-VP + The DT B-NP + statement NN I-NP + by IN B-PP + the DT B-NP + Iranian JJ I-NP + High NNP I-NP + Command NNP I-NP + appeared VBD B-VP + to TO I-VP + herald VB I-VP + the DT B-NP + close NN I-NP + of IN B-PP + an DT B-NP + assault NN I-NP + on IN B-PP + the DT B-NP + port JJ I-NP + city NN I-NP + of IN B-PP + Basra NNP B-NP + in IN B-PP + southern JJ B-NP + Iraq NNP I-NP + . . O + " NN B-VP + The DT B-NP + operation NN I-NP + was VBD B-VP + launched VBN I-VP + at IN B-PP + a DT B-NP + time NN I-NP + when WRB B-ADVP + the DT B-NP + Baghdad NNP I-NP + government NN I-NP + was VBD B-VP + spreading VBG I-VP + extensive JJ B-NP + propaganda NN I-NP + on IN B-PP + the DT B-NP + resistance NN I-NP + power NN I-NP + of IN B-PP + its NNS B-NP + army NN I-NP + ... NNS I-NP + , , O + " NNS B-NP + said VBD B-VP + the DT B-NP + statement NN I-NP + quoted VBN B-VP + by IN B-PP + IRNA NNP B-NP + . . O + It PRP B-NP + claimed VBD B-VP + massive JJ B-NP + victories NNS I-NP + in IN B-PP + the DT B-NP + seven-week NN I-NP + offensive NN I-NP + and CC O + called VBN B-VP + on IN B-PP + supporters NNS B-NP + of IN B-SBAR + Baghdad NNP B-NP + to TO B-VP + " VB I-VP + come VBN I-VP + to TO B-PP + their IN B-NP + senses JJ I-NP + " NNS I-NP + and CC O + discontinue VB B-VP + support NN B-NP + for IN B-PP + what WP B-NP + it PRP B-NP + called VBD B-VP + the DT B-NP + tottering VBG I-NP + regime NN I-NP + in IN B-PP + Iraq NNP B-NP + . . I-NP + Iran NNP I-NP + said VBD B-VP + its NNS B-NP + forces NNS I-NP + had VBD B-VP + " CD B-NP + liberated JJ I-NP + " NN I-NP + 155 CD I-NP + square JJ I-NP + kilometers NNS I-NP + of IN B-PP + enemy-occupied JJ-occupied B-NP + territory NN I-NP + during IN B-PP + the DT B-NP + 1987 CD I-NP + offensive NN I-NP + and CC O + taken VBN B-VP + over IN B-PP + islands NNS B-NP + , , O + townships NNS B-NP + , , O + rivers NNS B-NP + and CC O + part NN B-NP + of IN B-PP + a DT B-NP + road NN I-NP + leading VBG B-VP + into IN B-PP + Basra NNP B-NP + . . O + The DT B-NP + Iranian JJ I-NP + forces NNS I-NP + " NNS I-NP + are VBP B-VP + in IN B-PP + full JJ B-NP + control NN I-NP + of IN B-PP + these DT B-NP + areas NNS I-NP + , , O + " NNS B-NP + the DT B-NP + statement NN I-NP + said VBD B-VP + . . O + It PRP B-NP + said VBD B-VP + 81 CD B-NP + Iraqi JJ I-NP + brigades NNS I-NP + and CC I-NP + battalions NNS I-NP + were VBD B-VP + totally RB I-VP + destroyed VBN I-VP + , , O + along IN B-ADVP + with IN B-PP + 700 CD B-NP + tanks NNS I-NP + and CC O + 1,500 CD B-NP + other JJ I-NP + vehicles NNS I-NP + . . O + The DT B-NP + victory NN I-NP + list NN I-NP + also RB B-ADVP + included VBD B-VP + 80 CD B-NP + warplanes NNS I-NP + downed VBD B-VP + , , O + 250 CD B-NP + anti- - I-NP + aircraft NN I-NP + guns NNS I-NP + and CC O + 400 CD B-NP + pieces NNS I-NP + of IN B-PP + military JJ B-NP + hardware NN I-NP + destroyed VBN B-VP + and CC O + the DT B-NP + seizure NN I-NP + of IN B-PP + 220 CD B-NP + tanks NNS I-NP + and CC O + armoured JJ B-NP + personnel NNS I-NP + carriers NNS I-NP + . . O + U.S. NNP O + bank NN I-NP + discount NN I-NP + window RB I-NP + borrowings NNS I-NP + less NNS I-NP + extended VBN B-NP + credits NN I-NP + averaged VBD B-VP + 310 CD B-NP + mln NN I-NP + dlrs NN I-NP + in IN B-PP + the DT B-NP + week NN I-NP + to TO B-PP + Wednesday NNP B-NP + February NNP I-NP + 25 CD I-NP + , , O + the DT B-NP + Federal JJ I-NP + Reserve NNP I-NP + said VBD B-VP + . . O + The DT B-NP + Fed JJ I-NP + said VBD B-VP + that IN B-SBAR + overall JJ B-NP + borrowings NNS I-NP + in IN B-PP + the DT B-NP + week NN I-NP + fell MD B-VP + 131 CD B-NP + mln NN I-NP + dlrs NN I-NP + to TO B-PP + 614 CD B-NP + mln NN I-NP + dlrs NN I-NP + , , O + with IN B-PP + extended VBN B-NP + credits NN I-NP + up IN B-PP + 10 CD B-NP + mln NN I-NP + dlrs NN I-NP + at IN B-PP + 304 CD B-NP + mln NN I-NP + dlrs NN I-NP + . . O + The DT B-NP + week NN I-NP + was VBD B-VP + the DT B-NP + second NN I-NP + half NN I-NP + of IN B-PP + a DT B-NP + two-week NN I-NP + statement NN I-NP + period. NNS I-NP + Net VBD B-VP + borrowings NNS B-NP + in IN B-PP + the DT B-NP + prior NN I-NP + week NN I-NP + averaged RB B-NP + 451 CD I-NP + mln NN I-NP + dlrs NN I-NP + . . O + Commenting NNP O + on IN B-PP + the DT B-NP + two-week NN I-NP + statement NN I-NP + period NNS I-NP + ended VBD B-VP + February NNP B-NP + 25 CD I-NP + , , O + the DT B-NP + Fed NNP I-NP + said VBD B-VP + that NN B-SBAR + banks NNS B-NP + had VBD B-VP + average JJ B-NP + net NN I-NP + free JJ I-NP + reserves NN I-NP + of IN B-PP + 644 CD B-NP + mln NN I-NP + dlrs NN I-NP + a DT B-NP + day NN I-NP + , , O + down IN B-PP + from JJ B-NP + 1.34 NN I-NP + billion NN I-NP + two RB B-NP + weeks NNS I-NP + earlier IN B-ADVP + . . O + A RB B-ADJP + Federal JJ I-ADJP + Reserve . B-NP + spokesman NN B-VP + told VBN I-VP + a DT B-NP + press NN I-NP + briefing VBG B-VP + that IN B-SBAR + there EX B-NP + were VBD B-VP + no RB B-NP + large JJ I-NP + single NN I-NP + day NN I-NP + net RB I-NP + misses NNS I-NP + in IN B-PP + the DT B-NP + Fed's default I-NP + reserve NN I-NP + projections NNS I-NP + in IN B-PP + the DT B-NP + week NN I-NP + to TO B-PP + Wednesday NNP B-NP + . . I-NP + He NNP I-NP + said VBD B-VP + that NN B-NP + natural JJ I-NP + float NN I-NP + had VBD B-VP + been VBN I-VP + " NN B-NP + acting VBG B-VP + a DT B-NP + bit NN I-NP + strangely RB B-VP + " VBN I-VP + for IN B-PP + this DT B-NP + time NN I-NP + of IN B-PP + year NN B-NP + , , O + noting VBG B-VP + that IN B-SBAR + there EX B-NP + had VBD B-VP + been VBN I-VP + poor JJ B-NP + weather NN I-NP + during IN B-PP + the DT B-NP + latest JJ I-NP + week NN I-NP + . . O + The DT B-NP + spokesman NN I-NP + said VBD B-VP + that IN B-SBAR + natural JJ B-NP + float NN I-NP + ranged VBN B-VP + from IN B-PP + under IN B-NP + 500 CD I-NP + mln NN I-NP + dlrs NN I-NP + on IN B-PP + Friday NNP B-NP + , , O + for IN B-PP + which NNP B-NP + he NN B-NP + could VBN B-VP + give JJ B-NP + no RB I-NP + reason NN I-NP + , , O + to TO B-PP + nearly JJ B-NP + one CD I-NP + billion IN B-PP + dlrs NN B-NP + on IN B-PP + both NN B-NP + Thursday default B-NP + and CC O + Wednesday default B-NP + . . O + The DT B-NP + Fed JJ I-NP + spokeman NN I-NP + could VBN B-VP + give JJ B-NP + no NN I-NP + reason NN I-NP + for IN B-PP + Thursday's NNP B-NP + high NN I-NP + float NNS I-NP + , , O + but NNS B-NP + he DT B-NP + said VBD B-VP + that IN B-PP + about NN B-NP + 750 CD I-NP + mln NN I-NP + dlrs NN I-NP + of IN B-PP + Wednesday's NNP B-NP + float NN I-NP + figure NNS I-NP + was VBD B-VP + due VBD I-VP + to TO I-VP + holdover VB I-VP + and CC O + transportation NN B-VP + float IN B-PRT + at IN B-PP + two NN B-NP + widely WDT I-NP + separated VBN B-VP + Fed VBN B-NP + districts NNS I-NP + . . O + For NNP O + the DT B-NP + week NN I-NP + as IN B-PP + a DT B-NP + whole NN I-NP + , , O + he DT B-NP + said VBD B-VP + that IN B-SBAR + float NN B-NP + related VBN B-VP + as IN B-PP + of NNP B-NP + adjustments NNS I-NP + were VBD B-VP + " RB B-ADJP + small JJ I-ADJP + , , O + " IN B-PP + adding VBG B-VP + that IN B-SBAR + they NN B-NP + fell NN I-NP + to TO B-PP + a DT B-NP + negative JJ I-NP + 750 CD I-NP + mln NN I-NP + dlrs NN I-NP + on IN B-PP + Tuesday NNP B-NP + due NN I-NP + to TO B-PP + a DT B-NP + number NN I-NP + of IN B-PP + corrections NN B-NP + for IN B-PP + unrelated VBN B-NP + cash NN I-NP + letter IN B-PP + errors NNS B-NP + in IN B-PP + six JJ B-NP + districts NNS I-NP + around IN B-PP + the DT B-NP + country NN I-NP + . . O + The DT B-NP + spokesman NN I-NP + said VBD B-VP + that NN B-NP + on IN B-PP + both JJ B-NP + Tuesday NNP I-NP + and CC I-NP + Wednesday NNP B-NP + , , O + two IN B-PP + different JJ B-NP + clearing NN I-NP + banks NNS I-NP + had VBD B-VP + system JJ B-NP + problems NNS I-NP + and CC O + the DT B-NP + securities NNS I-NP + and CC I-NP + Federal JJ I-NP + funds NNS I-NP + wires NNS I-NP + had VBD B-VP + to TO I-VP + be VB I-VP + held VBN I-VP + open JJ B-NP + until NNS I-NP + about IN B-PP + 2000 CD B-NP + or NNP I-NP + 2100 CD I-NP + EST NNS I-NP + on IN B-PP + both JJ B-NP + days NN I-NP + . . O + However NNP B-NP + , , O + he CD B-NP + said VBD B-VP + that IN B-SBAR + both NNP B-NP + problems NN I-NP + were VBD B-VP + cleared VBN I-VP + up IN B-ADVP + during VBG B-VP + both IN B-PP + afternoons NNS B-NP + and CC O + there DT B-NP + was VBD B-VP + no RB B-ADJP + evidence JJ I-ADJP + of IN B-PP + any DT B-NP + reserve JJ I-NP + impact NN I-NP + . . O + During VBG B-VP + the DT B-NP + week NN I-NP + ended VBN B-VP + Wednesday NNP B-NP + , , O + 45 CD B-NP + pct NN I-NP + of IN B-PP + net JJ B-NP + discount NN I-NP + window NN I-NP + borrowings NNS I-NP + were VBD B-VP + made JJ B-ADJP + by IN B-PP + the DT B-NP + smallest NN I-NP + banks NNS I-NP + , , O + with IN B-PP + 30 CD B-NP + pct NN I-NP + by IN B-PP + the DT B-NP + 14 CD I-NP + large RB I-NP + money JJ I-NP + center NN I-NP + banks NNS I-NP + and CC O + 25 CD B-NP + pct NN I-NP + by IN B-PP + large JJ B-NP + regional NN I-NP + institutions NNS I-NP + . . O + On NNP B-NP + Wednesday NNP I-NP + , , O + 55 CD B-NP + pct NN I-NP + of IN B-PP + the DT B-NP + borrowing NN I-NP + was VBD B-VP + accounted VBN I-VP + for IN B-PP + by IN B-PP + the DT B-NP + money NN I-NP + center NN I-NP + banks NNS I-NP + , , O + with IN B-PP + 30 CD B-NP + pct NN I-NP + by IN B-PP + the DT B-NP + large JJ I-NP + regionals NN I-NP + and CC O + 15 CD B-NP + pct NN I-NP + by IN B-PP + the DT B-NP + smallest JJ I-NP + banks NNS I-NP + . . O + The DT B-NP + Fed JJ I-NP + spokesman NN I-NP + said VBD B-VP + the DT B-NP + banking NN I-NP + system IN B-NP + had VBD B-VP + excess VBZ B-NP + reserves NN I-NP + on IN B-PP + Thursday NNP B-NP + , , O + Monday NNP B-NP + and CC I-NP + Tuesday NNP I-NP + and CC O + a DT B-NP + deficit NN I-NP + on IN B-PP + Friday NNP B-NP + and CC O + Wedndsday NNP B-NP + . . I-NP + That NNP I-NP + produced VBD B-VP + a DT B-NP + small JJ I-NP + daily NN I-NP + average JJ I-NP + deficit NN I-NP + for IN B-PP + the DT B-NP + week NN I-NP + as IN B-PP + a DT B-NP + whole NN I-NP + . . B-VP + For NNP B-PP + the DT B-NP + two-week NN I-NP + period NNS I-NP + , , O + he NNS B-NP + said VBD B-VP + there EX B-NP + were VBD B-VP + relatively JJ B-NP + high NN I-NP + excess VBZ B-VP + reserves NN B-NP + on IN B-PP + a DT B-NP + daily JJ I-NP + avearge NN I-NP + , , O + almost IN B-PP + all DT B-NP + of IN B-PP + which CD B-NP + were VBD B-VP + at IN B-PP + the DT B-NP + smallest JJ I-NP + banks NNS I-NP + . . O + Reuter IN B-PP +  CD B-NP + American RB I-NP + Express JJ I-NP + Co NNP I-NP + remained VBN I-NP + silent NN I-NP + on IN B-PP + market NN B-NP + rumors NN I-NP + it PRP B-NP + would VBD B-VP + spinoff IN B-PP + all DT B-NP + or JJ I-NP + part NN I-NP + of IN B-PP + its NNS B-NP + Shearson NNP I-NP + Lehman NNP I-NP + Brothers NNS I-NP + Inc NNP I-NP + , , O + but IN B-SBAR + some DT B-NP + analysts NNS I-NP + said VBD B-VP + the DT B-NP + company NN I-NP + may NN I-NP + be VB B-VP + considering NN B-NP + such IN B-PP + a DT B-NP + move JJ I-NP + because NN I-NP + it PRP B-NP + is VBZ B-VP + unhappy NN B-NP + with IN B-PP + the DT B-NP + market JJ I-NP + value NN I-NP + of IN B-PP + its NNS B-NP + stock NN I-NP + . . B-ADVP + American RB B-NP + Express JJ I-NP + stock NN I-NP + got NN I-NP + a DT B-NP + lift NN I-NP + from WRB B-ADVP + the DT B-NP + rumor NN I-NP + , , O + as IN B-SBAR + the DT B-NP + market NN I-NP + calculated VBN B-VP + a DT B-NP + partially JJ I-NP + public NN I-NP + Shearson IN B-PP + may NN B-NP + command VBN B-VP + a DT B-NP + good JJ I-NP + market NN I-NP + value NN I-NP + , , O + thereby IN B-PP + boosting VBG B-VP + the DT B-NP + total JJ I-NP + value NN I-NP + of IN B-PP + American NNP B-NP + Express default I-NP + . . O + The DT B-NP + rumor NN I-NP + also NN I-NP + was VBD B-VP + accompanied VBN I-VP + by IN B-PP + talk NN B-NP + the DT B-NP + financial JJ I-NP + services NNS I-NP + firm IN B-PP + would JJ B-NP + split NN I-NP + its NNS I-NP + stock IN B-PP + and CC O + boost JJ B-NP + its NNS I-NP + dividend VBD B-VP + . . O + American RB O + Express VBZ B-VP + closed VBN I-VP + on IN B-PP + the DT B-NP + New JJ I-NP + York NNP I-NP + Stock NNP I-NP + Exchange VBD B-VP + at IN B-PP + 72-5/8 CD B-NP + , , O + up IN B-PP + 4-1/8 NN B-NP + on IN B-PP + heavy NN B-NP + volume default I-NP + . . B-ADVP + American RB B-ADJP + Express JJ I-ADJP + would VBD B-VP + not IN B-PP + comment NN B-NP + on IN B-PP + the DT B-NP + rumors NN I-NP + or IN B-PP + its NNS B-NP + stock NN I-NP + activity NN I-NP + . . O + Analysts NNS B-NP + said VBD B-VP + comments VBN I-VP + by IN B-PP + the DT B-NP + company NN I-NP + at IN B-PP + an DT B-NP + analysts' NN I-NP + meeting VBG B-VP + Tuesday default B-NP + helped VBN I-NP + fuel JJ B-NP + the DT I-NP + rumors NN I-NP + as IN B-PP + did NN B-NP + an DT B-NP + announcement JJ I-NP + yesterday NN I-NP + of IN B-PP + management JJ B-NP + changes NNS I-NP + . . O + At RB O + the DT B-NP + meeting VBG I-NP + , , I-NP + company NN I-NP + officials IN B-NP + said VBD B-VP + American RB B-NP + Express JJ I-NP + stock NN I-NP + is VBZ B-VP + undervalued VBN I-VP + and CC O + does NNS B-VP + not NN B-NP + fully NN I-NP + reflect NN B-VP + the DT B-NP + performance NN I-NP + of IN B-PP + Shearson NNP B-NP + , , O + according IN B-PP + to TO B-PP + analysts NNS B-NP + . . O + Yesterday NNP B-NP + , , O + Shearson NNP B-NP + said VBD B-VP + it PRP B-NP + was VBD B-VP + elevating VBG I-VP + its NNS B-NP + chief NNP I-NP + operating VBG I-NP + officer IN I-NP + , , O + Jeffery NNP B-NP + Lane NNP I-NP + , , O + to TO B-PP + the DT B-NP + added JJ I-NP + position NN I-NP + of IN B-PP + president NN B-NP + , , O + which IN B-NP + had VBD B-VP + been VBN I-VP + vacant NN B-NP + . . O + It PRP B-NP + also RB I-VP + created VBN I-VP + four IN B-PP + new JJ B-NP + positions NNS I-NP + for IN B-PP + chairmen NN B-NP + of IN B-PP + its NNS B-NP + operating VBG I-NP + divisions NNS I-NP + . . O + Analysts NNS B-NP + speculated VBD B-VP + a DT B-NP + partial JJ I-NP + spinoff NNP I-NP + would VBD B-VP + make NN B-NP + most NN I-NP + sense NNS I-NP + , , O + contrary JJ B-ADJP + to TO B-PP + one CD B-NP + variation NN I-NP + on IN B-PP + market JJ B-NP + rumors NN I-NP + of IN B-PP + a DT B-NP + total JJ I-NP + spinoff NNP I-NP + . . O + Some DT B-NP + analysts NNS I-NP + , , O + however NNS B-NP + , , O + disagreed VBD B-VP + that IN B-PP + any JJ B-NP + spinoff NN I-NP + of IN B-PP + Shearson NNP B-NP + would VBD B-VP + be VB I-VP + good NN B-NP + since IN B-SBAR + it PRP B-NP + is VBZ B-VP + a DT B-NP + strong VBG I-NP + profit NN I-NP + center NN I-NP + for IN B-PP + American NNP B-NP + Express NNS I-NP + , , O + contributing VBG B-VP + about IN B-NP + 20 CD I-NP + pct NN I-NP + of IN B-PP + earnings NNS B-NP + last JJ B-NP + year NN I-NP + . . O + " NN B-NP + I IN B-PP + think NN B-NP + it PRP B-NP + is VBZ B-VP + highly RB O + unlikely JJ B-NP + that NN I-NP + American RB B-NP + Express JJ I-NP + is VBZ B-VP + going VBG I-VP + to TO B-PP + sell JJ B-NP + shearson NN I-NP + , , O + " IN B-NP + said VBD B-VP + Perrin CD B-NP + Long VBG I-NP + of IN B-PP + Lipper NNP B-NP + Analytical default I-NP + . . O + He JJ I-VP + questioned VBD I-VP + what IN B-NP + would VBN B-VP + be VB I-VP + a DT B-NP + better NN I-NP + investment NN I-NP + than NN I-NP + " RB B-NP + a DT I-NP + very NN I-NP + profitable NN I-NP + securities NNS I-NP + firm IN B-PP + . . B-NP + " NN I-NP + Several JJ I-NP + analysts NNS I-NP + said VBD B-VP + American RB B-ADJP + Express JJ I-ADJP + is VBZ B-VP + not RB O + in IN B-PP + need JJ B-NP + of IN B-PP + cash NNP B-NP + , , O + which IN B-PP + might NN B-NP + be VB B-VP + the DT B-NP + only JJ I-NP + reason NN I-NP + to TO B-VP + sell JJ I-VP + a DT B-NP + part NN I-NP + of IN B-PP + a DT B-NP + strong NN I-NP + asset IN B-PP + . . B-NP + But JJ I-NP + others NNS I-NP + believe VBP B-VP + the DT B-NP + company NN I-NP + could VBN B-VP + very JJ B-ADVP + well RB B-ADVP + of IN B-ADVP + considered VBD B-VP + the DT B-NP + option NN I-NP + of IN B-PP + spinning VBG B-VP + out JJ B-NP + part NN I-NP + of IN B-PP + Shearson NNP B-NP + , , O + and CC O + one JJ B-NP + rumor NN I-NP + suggests NNS I-NP + selling VBG B-VP + about IN B-NP + 20 CD I-NP + pct NN I-NP + of IN B-PP + it PRP B-NP + in IN B-PP + the DT B-NP + market NN I-NP + . . O + Larry JJ O + Eckenfelder . O + of IN B-PP + Prudential-Bache DT B-NP + Securities NNS I-NP + said VBD B-VP + he DT B-NP + believes NN I-NP + American RB B-VP + Express VBN I-VP + could VBN I-VP + have VBP B-VP + considered VBN I-VP + a DT B-NP + partial JJ I-NP + spinoff NN I-NP + in IN B-PP + the DT B-NP + past NN I-NP + . . O + " IN B-PP + Shearson NNP B-NP + being NN I-NP + as IN B-PP + profitable NN B-NP + as IN B-SBAR + it PRP B-NP + is VBZ B-VP + would VBD I-VP + have VBP I-VP + fetched VBN I-VP + a DT B-NP + big NN I-NP + premium NN I-NP + in IN B-PP + the DT B-NP + market NN I-NP + place. NN I-NP + Shearson's NNP I-NP + book NN I-NP + value NN I-NP + is VBZ B-VP + in IN B-PP + the DT B-NP + 1.4 CD I-NP + mln NN I-NP + dlr IN B-PP + range NN B-NP + . . O + Shearson NNP O + in IN B-PP + the DT B-NP + market NN I-NP + place NN I-NP + would MD B-VP + probably RB I-VP + be VB I-VP + worth RB B-ADVP + three DT B-NP + to TO I-NP + 3.5 CD I-NP + bilion NN I-NP + dlrs NN I-NP + in IN B-PP + terms NN B-NP + of IN B-PP + market JJ B-NP + capitalization NN I-NP + , , O + " IN B-NP + said VBD B-VP + Eckenfelder CD B-NP + . . O + Some DT B-NP + analysts NNS I-NP + said VBD B-VP + American RB B-NP + Express JJ I-NP + could VBN B-VP + use IN B-PP + capital JJ B-NP + since NN I-NP + it PRP B-NP + plans VBD B-VP + to TO I-VP + expand NNS B-NP + globally JJ B-ADJP + . . O + " NNS B-VP + They NNP B-NP + have VBP B-VP + enormous NNS B-NP + internal JJ B-NP + growth NNS I-NP + plans NNS I-NP + that IN B-PP + takes NNS B-NP + capital JJ B-ADJP + . . O + You NNP B-NP + want NN I-NP + your NN I-NP + stock RB B-ADVP + to TO B-PP + reflect JJ B-NP + realistic NN I-NP + valuations NNS I-NP + to TO B-PP + enhance JJ B-NP + your NN I-NP + ability NN I-NP + to TO B-PP + make JJ B-NP + all DT I-NP + kinds NN I-NP + of IN B-PP + endeavors NNS B-NP + down IN B-PP + the DT B-NP + road NN I-NP + , , O + " IN B-NP + said VBD B-VP + E.F. CD B-NP + Hutton NNP I-NP + Group NNP I-NP + analyst IN B-PP + Michael default B-NP + Lewis default I-NP + . . B-NP + " NN I-NP + They've DT B-NP + outlined VBD B-VP + the DT B-NP + fact NN I-NP + that IN B-SBAR + they're DT B-NP + investing VBG I-NP + heavily NN I-NP + in IN B-PP + the DT B-NP + future NNS I-NP + , , O + which IN B-PP + goes NNS B-NP + heavily NN I-NP + into IN B-PP + the DT B-NP + international JJ I-NP + arena, NN I-NP + " NN I-NP + said VBD B-VP + Lewis CD B-NP + . . O + " default B-VP + . . I-VP + ..That . O + does NNS B-VP + not NN B-NP + preclude NN I-NP + acquisitions NNS I-NP + and CC O + divestitures NNS B-NP + along IN B-PP + the DT B-NP + way NN I-NP + , , O + " IN B-PP + he DT B-NP + said VBD I-NP + . . O + Lewis VBZ O + said VBD B-VP + if CD B-NP + American RB I-NP + Express JJ I-NP + reduced VBN I-NP + its NNS I-NP + exposure NN I-NP + to TO B-PP + the DT B-NP + brokerage NN I-NP + business NNS I-NP + by IN B-PP + selling VBG B-VP + part NN B-NP + of IN B-PP + shearson NN B-NP + , , O + its NNS B-NP + stock NN I-NP + might NN I-NP + better IN B-PP + reflect NN B-NP + other IN B-PP + assets NNS B-NP + , , O + such NNS B-NP + as IN B-PP + the DT B-NP + travel NN I-NP + related VBN I-NP + services NNS I-NP + business NNS I-NP + . . O + " NN B-VP + It PRP B-NP + could VBD B-VP + find CD B-NP + its NNS I-NP + true VBD B-VP + water IN B-PP + mark NN B-NP + with IN B-PP + a DT B-NP + lesser JJ I-NP + exposure NN I-NP + to TO B-VP + brokerage VB I-VP + . . O + The DT B-NP + value NN I-NP + of IN B-PP + the DT B-NP + other NN I-NP + components NNP-named I-NP + could VBN B-VP + command VBN I-VP + a DT B-NP + higher NN I-NP + multiple WRB B-ADVP + because NN B-NP + they NN I-NP + constitute VBD B-VP + a DT B-NP + higher NN I-NP + percentage NN I-NP + of IN B-PP + the DT B-NP + total NN I-NP + operating IN B-PP + earnings NNS B-NP + of IN B-PP + the DT B-NP + company NN I-NP + , , O + " IN B-PP + he DT B-NP + said VBD I-NP + . . O + Lewis VBZ O + said VBD B-VP + Shearson CD B-NP + contributed VBN B-VP + 316 CD B-NP + mln NN I-NP + in IN B-PP + after-tax JJ B-NP + operating VBG I-NP + earnings NNS I-NP + , , O + up NNS B-NP + from IN B-PP + about NN B-NP + 200 CD I-NP + mln NN I-NP + dlrs NN I-NP + in IN B-PP + 1985 default B-NP + . . O + Reuter IN B-PP +  CD B-NP + Coleco NNP I-NP + Industries NNP I-NP + Inc NNP I-NP + said VBD B-VP + it PRP B-NP + expects NNS B-VP + to TO B-NP + return JJ I-VP + to TO B-PP + profitability NN B-NP + in IN B-PP + 1987 default B-NP + . . O + Earlier NNP B-NP + , , O + Coleco NNP B-NP + reported VBN B-VP + a DT B-NP + net JJ I-NP + loss CD I-NP + of IN B-PP + 111.2 CD B-NP + mln NN I-NP + dlrs NN I-NP + for IN B-PP + the DT B-NP + year NN I-NP + ended VBN B-VP + December IN B-PP + 31 CD B-NP + compared VBN B-VP + to TO B-PP + a DT B-NP + profit NN I-NP + of IN B-PP + 64.2 CD B-NP + mln NN I-NP + dlrs NN I-NP + in IN B-PP + the DT B-NP + year NN I-NP + earlier IN B-PP + . . B-NP + In IN B-PP + a DT B-NP + prepared JJ I-NP + statement NN I-NP + , , O + the DT B-NP + company NN I-NP + said VBD B-VP + the DT B-NP + dramatic NN I-NP + swing IN B-PP + in IN B-PP + operating VBG B-NP + results NNS I-NP + was VBD B-VP + due JJ B-NP + primarily NN I-NP + to TO B-PP + the DT B-NP + steep NN I-NP + decline NN I-NP + in IN B-PP + sales NNS B-NP + of IN B-PP + Cabbage JJ B-NP + Patch NNP I-NP + Kids NNP I-NP + products NNS I-NP + from IN B-PP + 600 CD B-NP + mln NN I-NP + dlrs NN I-NP + to TO B-PP + 230 CD B-NP + mln NN I-NP + dlrs NN I-NP + . . O + Coleco NNP B-NP + said VBD B-VP + it PRP B-NP + changed VBD B-VP + from VBN I-VP + a DT B-NP + single JJ I-NP + product NN I-NP + company NN I-NP + to TO B-PP + a DT B-NP + more JJ I-NP + diversified CD I-NP + organization NN I-NP + through IN B-PP + four JJ B-NP + major NN I-NP + acquisitions NNS I-NP + last JJ B-NP + year NN I-NP + . . O + Products NNS B-NP + from IN B-PP + the DT B-NP + new NN I-NP + acquisitions NNS I-NP + and CC O + other VB B-VP + new RB B-NP + product NN I-NP + introductions NNS I-NP + are VBP B-VP + expected VBN I-VP + to TO I-VP + enable NNS B-NP + it PRP B-NP + to TO B-VP + return JJ B-NP + to TO B-PP + profitability NN B-NP + , , O + it PRP B-NP + said VBD B-VP + . . O + At RB O + the DT B-NP + annual JJ I-NP + Toy NNP I-NP + Fair NNP I-NP + earlier IN B-PP + this DT B-NP + month JJ I-NP + , , I-NP + vice JJ I-NP + president NN I-NP + Morton NNP I-NP + Handel NNP I-NP + said VBD B-VP + analysts' CD B-NP + 1987 NN I-NP + projected VBN I-NP + earnings NNS I-NP + of IN B-PP + 90 CD B-NP + cts NNS I-NP + a DT B-NP + share NN I-NP + on IN B-PP + sales NNS B-NP + of IN B-PP + 600 CD B-NP + mln NN I-NP + dlrs NN I-NP + are VBP B-VP + reasonable NN B-NP + . . O + Venezuela NNP-5 B-NP + is VBZ B-VP + seeking VBG I-VP + a DT B-NP + 'constructive JJ I-NP + and CC I-NP + flexible' NNS I-NP + attitude IN B-PP + from JJ B-NP + its NNS I-NP + creditor NN I-NP + banks NNS I-NP + in IN B-PP + current JJ B-NP + talks NNS I-NP + to TO B-PP + reschedule JJ B-NP + 21 CD I-NP + billion NN I-NP + dlrs NN I-NP + in IN B-PP + foreign NN B-NP + debt VBN B-VP + , , O + finance JJ B-NP + minister NN I-NP + manuel JJ I-NP + azpurua NN I-NP + told VBN B-VP + a DT B-NP + press NN I-NP + conference. NN I-NP + He NNP I-NP + declined VBD B-VP + to TO B-PP + comment NN B-NP + on IN B-PP + meetings NNS B-NP + this DT B-NP + week NN I-NP + in IN B-PP + new JJ B-NP + york NN I-NP + between VBN B-VP + public IN B-PP + finances NNS B-NP + director IN B-PP + jorge JJ B-NP + marcano NN I-NP + and CC O + venezuela's VBN B-NP + 13-bank NN I-NP + advisory NN I-NP + committee NNP-named I-NP + except NN I-NP + to TO B-PP + say NN B-NP + , , O + " IN B-PP + they NN B-NP + are VBP B-VP + progressing NNS B-NP + . . O + " NNP B-NP + Azpurua NNP I-NP + said VBD B-VP + venezuela NN B-NP + has NNS I-NP + shown IN B-PP + solidarity JJ B-NP + with IN B-PP + brazil's NNS B-NP + decision VBD B-VP + to TO B-PP + suspend CD B-NP + payments NNS I-NP + , , O + but NNS B-NP + each IN B-PP + country NN B-NP + must VBZ B-VP + negotiate RB I-VP + according VBG I-VP + to TO B-PP + its NNS B-NP + own JJ I-NP + interest NN I-NP + . . O + Asked VBD B-VP + to TO B-PP + comment NN B-NP + on IN B-PP + chile's NN B-NP + agreement NN I-NP + with IN B-PP + its NNS B-NP + creditors NN I-NP + today NN I-NP + , , O + which IN B-PP + includes NNS B-NP + an DT B-NP + interest JJ I-NP + rate NN I-NP + margin NN I-NP + of IN B-PP + one CD B-NP + pct NN I-NP + over IN B-PP + libor JJ B-NP + , , O + azpurua NNP B-NP + said VBD B-VP + only NN B-NP + , , O + " IN B-SBAR + that NN B-NP + is VBZ B-VP + good JJ B-NP + news NNS I-NP + . . O + " NNS B-NP + According VBG B-VP + to TO B-PP + banking VBG B-NP + sources NNS I-NP + , , O + the DT B-NP + banks' NN I-NP + latest NN I-NP + offer IN B-PP + to TO B-PP + venezuela CD B-NP + is VBZ B-VP + also RB B-ADVP + a DT B-NP + one JJ I-NP + pct NN I-NP + margin JJ I-NP + as IN B-PP + against NN B-NP + the DT B-NP + last JJ I-NP + february's NN I-NP + 1-1/8 CD B-NP + pct NN I-NP + rescheduling VBG I-NP + accord NNS I-NP + and CC O + the DT B-NP + 7/8 NN I-NP + pct NN I-NP + Venezuela NNP I-NP + wants NNS I-NP + . . O + Azpurua NNP B-NP + said VBD B-VP + four NN B-NP + basic NN I-NP + elements NNS I-NP + are VBP B-VP + being VBN I-VP + negotiated VBN I-VP + with IN B-PP + the DT B-NP + banks NNS I-NP + now: NN I-NP + spread VBD B-VP + reduction VBN I-VP + , , O + deferral JJ B-ADJP + of IN B-PP + principal JJ B-NP + payments NNS I-NP + due NNS I-NP + in IN B-PP + 1987 CD B-NP + and CC I-NP + 1988 CD I-NP + , , O + lenghtening VBG B-VP + the DT B-NP + 12-1/2 CD I-NP + year NN I-NP + repayment NN I-NP + schedule NN I-NP + , , O + and CC O + debt VBN B-VP + capitalization IN B-PP + schemes NNS B-NP + . . O + Azpurua NNP B-NP + said VBD B-VP + the DT B-NP + governent NN I-NP + plans NN I-NP + to TO B-PP + pay NN B-NP + 2.1 CD I-NP + billion NN I-NP + dlrs NN I-NP + in IN B-PP + public NNP B-NP + and CC O + private JJ B-NP + debt NN I-NP + principal NN I-NP + this DT B-NP + year NN I-NP + . . O + It PRP B-NP + was VBD B-VP + due VBD I-VP + to TO I-VP + amortize VB I-VP + 1.05 CD B-NP + billion NN I-NP + dlrs NN I-NP + under IN B-PP + the DT B-NP + rescheduling NN I-NP + , , O + and CC O + pay NN B-NP + 420 CD I-NP + mln NN I-NP + dlrs NN I-NP + in IN B-PP + non-restructured JJ B-NP + principal NN I-NP + , , O + both IN B-PP + public JJ B-NP + sector NN I-NP + . . O + He NNP B-NP + said VBD B-VP + venezuela's CD B-NP + original JJ I-NP + proposal NN I-NP + was VBD B-VP + to TO B-PP + pay NN B-NP + no RB I-NP + principal JJ I-NP + on IN B-PP + restructured JJ B-NP + debt NN I-NP + this DT B-NP + year NN I-NP + , , O + but IN B-NP + is VBZ B-VP + now RB I-VP + insisting VBG I-VP + that IN B-SBAR + if NNP B-NP + it PRP B-NP + makes VBZ B-VP + payments NNS B-NP + they IN B-PP + be VB B-NP + compensated VBN B-VP + by IN B-PP + new JJ B-NP + bank NN I-NP + loans NNS I-NP + . . O + The DT B-NP + banking VBG I-NP + sources NNS I-NP + said VBD B-VP + the DT B-NP + committee NN I-NP + has NNS B-VP + been VBN I-VP + prepared VBN I-VP + to TO I-VP + lower VB I-VP + amortizations VBN I-VP + to TO B-PP + around IN B-NP + 400 CD I-NP + mln NN I-NP + dlrs NN I-NP + this IN B-PP + year NN B-NP + , , O + but IN B-PP + that NN B-NP + no RB B-NP + direct JJ I-NP + commitment NN I-NP + was VBD B-VP + likely JJ B-ADJP + on IN B-PP + new JJ B-NP + loans NNS I-NP + . . O + " CD B-NP + debtors NNS I-NP + and CC I-NP + bank NNS I-NP + creditors NN I-NP + have VBP B-VP + a DT B-NP + joint JJ I-NP + responsibility NN I-NP + and CC O + there DT B-NP + will MD B-VP + be VB I-VP + no RB I-VP + lasting VBG I-VP + solution NN B-NP + unless NNS I-NP + a DT B-NP + positive JJ I-NP + flow NN I-NP + of IN B-PP + financing VBG B-NP + is VBZ B-VP + guaranteed NNS B-NP + , , O + " NNS B-NP + azpurua DT B-NP + said VBD B-VP + . . O + However IN B-ADVP + , , O + he NNS B-NP + appeared VBD B-VP + to TO I-VP + discard VB I-VP + earlier JJ B-NP + venezuelan NN I-NP + proposals NN I-NP + for IN B-PP + a DT B-NP + direct NN I-NP + link NN I-NP + between VBN B-VP + oil JJ B-NP + income NN I-NP + and CC O + debt VBN B-NP + payments NNS I-NP + , , O + "because NNS B-NP + circumstances NNS I-NP + change VBD B-VP + too RB B-ADJP + quickly JJ I-ADJP + . . O + " NNS B-VP + At RB B-ADVP + the DT B-NP + same JJ I-NP + time NN I-NP + , , O + he NN B-NP + said VBD B-VP + the DT B-NP + government NN I-NP + is VBZ B-VP + presently RB I-VP + studying VBG I-VP + possible JJ B-NP + mechanisms NNS I-NP + for IN B-PP + capitlizing VBG B-VP + public NN B-NP + and CC O + private RB B-NP + sector JJ I-NP + foreign NNS I-NP + debt VBD B-VP + , , O + based NNS B-NP + on IN B-PP + experience NN B-NP + in IN B-PP + other JJ B-NP + countries NNS I-NP + . . O + The DT B-NP + rules NN I-NP + would MD B-VP + be VB I-VP + published VBN I-VP + by IN B-PP + the DT B-NP + finance JJ I-NP + ministry NN I-NP + and CC O + the DT B-NP + central JJ I-NP + bank NN I-NP + . . O + Thomson NNP B-NP + McKinnon NNP I-NP + Mortgage NNP I-NP + Assets NNS I-NP + Corp NNP I-NP + , , O + a DT B-NP + unit NN I-NP + of IN B-PP + Thomson NNP B-NP + McKinnon NNP I-NP + Inc NNP I-NP + , , O + is VBZ B-VP + offering IN B-NP + 100 CD I-NP + mln NN I-NP + dlrs NN I-NP + of IN B-PP + collateralized VBN B-NP + mortgage NN I-NP + obligations NNS I-NP + in IN B-PP + three DT B-NP + tranches NNS I-NP + that IN B-PP + include JJ B-NP + floating NN I-NP + rate NN I-NP + and CC O + inverse RB B-VP + floating VBG I-VP + rate JJ B-NP + CMOS NNP I-NP + . . O + The DT B-NP + floating VBG I-NP + rate NN I-NP + class NN I-NP + amounts NNS I-NP + to TO B-PP + 60 CD B-NP + mln NN I-NP + dlrs NN I-NP + . . O + It PRP B-NP + has VBZ B-VP + an DT B-NP + average JJ I-NP + life NN I-NP + of IN B-PP + 7.11 CD B-NP + years NNS I-NP + and CC O + matures NNS B-NP + 2018. CD B-PP + The DT B-NP + CMOs JJ I-NP + have NN I-NP + an DT B-NP + initial JJ I-NP + coupon NN I-NP + of IN B-PP + 7.0375 CD B-NP + pct NN I-NP + , , O + which IN B-NP + will MD B-VP + be VB I-VP + reset NN B-NP + 60 CD I-NP + basis NNS I-NP + points NNS I-NP + above VBP B-VP + LIBOR VBN I-VP + , , O + said VBD B-VP + sole CD B-NP + manager NN I-NP + Thomson NNP I-NP + McKinnon NNP I-NP + . . O + The DT B-NP + inverse JJ I-NP + floater NN I-NP + totals IN B-PP + 4.8 CD B-NP + mln NN I-NP + dlrs NN I-NP + . . O + It PRP B-NP + has VBZ B-VP + an DT B-NP + average JJ I-NP + life NN I-NP + of IN B-PP + 13.49 CD B-NP + years NNS I-NP + and CC O + matures NNS B-NP + 2018. CD B-NP + These NNP I-NP + CMOs NNS I-NP + were VBD B-VP + given JJ I-VP + an DT B-NP + initial JJ I-NP + coupon NN I-NP + of IN B-PP + 11-1/2 CD B-NP + pct NN I-NP + and CC O + priced VBN B-VP + at IN B-PP + 104.40. CD B-NP + Subsequent JJ I-NP + rates NNS I-NP + on IN B-PP + the DT B-NP + inverse NN I-NP + floater IN B-PP + will DT B-NP + equal JJ I-NP + 11-1/2 CD I-NP + pct NN I-NP + minus VBZ B-VP + the DT B-NP + product NN I-NP + of IN B-PP + three DT B-NP + times NNS I-NP + (LIBOR NNP I-NP + minus NNS I-NP + 6-1/2 CD B-NP + pct) NNS I-NP + . . O + A RB O + Thomson NNP B-NP + officer IN I-PRT + explained VBN B-VP + that IN B-PP + the DT B-NP + coupon NN I-NP + of IN B-PP + the DT B-NP + inverse NN I-NP + floating IN B-PP + rate JJ B-NP + tranche NN I-NP + would VBN B-VP + increase IN B-PP + if NNP B-NP + LIBOR default I-NP + declined VBD B-VP + . . O + " NN B-VP + The DT B-NP + yield JJ I-NP + floats NN I-NP + opposite NN I-NP + of IN B-PP + LIBOR NNP B-NP + , , O + " CD B-NP + he NN I-NP + said VBD B-VP + . . O + The DT B-NP + fixed-rate JJ I-NP + tranche NN I-NP + totals NNS I-NP + 35.2 IN B-PP + mln NN B-NP + dlrs NN I-NP + . . O + It PRP B-NP + has VBZ B-VP + an DT B-NP + average JJ I-NP + life NN I-NP + of IN B-PP + 3.5 CD B-NP + years NNS I-NP + and CC O + matures NNS B-NP + 2016. CD B-PP + The DT B-NP + CMOs JJ I-NP + were NN I-NP + assigned VBN B-VP + a DT B-NP + 7.65 NN I-NP + pct NN I-NP + coupon NN I-NP + and CC O + par RB B-VP + pricing VBG I-VP + . . B-PP + The DT B-NP + issue NN I-NP + is VBZ B-VP + rated VBN I-VP + AAA RB B-ADVP + by IN B-PP + Standard NNP B-NP + and CC I-NP + Poor's NNP I-NP + and CC O + secured JJ B-ADVP + by IN B-SBAR + Federal JJ B-NP + Home . I-NP + Loan NNP I-NP + Mortgage NNP I-NP + Corp NNP I-NP + , , O + Freddie NNP B-NP + Mac NNP I-NP + , , O + certificates NNS B-NP + . . O + OPEC NNP B-NP + may NN I-NP + be VB B-VP + forced VBD I-VP + to TO B-PP + meet NN B-NP + before NN I-NP + a DT B-NP + scheduled JJ I-NP + June CD I-NP + session NN I-NP + to TO B-PP + readdress JJ B-NP + its NNS I-NP + production NN I-NP + cutting VBG I-NP + agreement NN I-NP + if IN B-SBAR + the DT B-NP + organization NN I-NP + wants NNS B-VP + to TO I-VP + halt VB I-VP + the DT B-NP + current NN I-NP + slide NN I-NP + in IN B-PP + oil JJ B-NP + prices NNS I-NP + , , O + oil JJ B-NP + industry NN I-NP + analysts NNS I-NP + said VBD B-VP + . . O + " NN B-VP + The DT B-NP + movement NN I-NP + to TO B-VP + higher VB I-VP + oil JJ B-NP + prices NNS I-NP + was VBD B-VP + never IN B-ADVP + to TO B-VP + be VB I-VP + as IN B-PP + easy NN B-NP + as IN B-PP + OPEC NNP B-NP + thought IN B-PP + . . B-NP + They NNP I-NP + may NN I-NP + need VBD B-VP + an DT B-NP + emergency NN I-NP + meeting VBG B-VP + to TO B-PP + sort NN B-NP + out IN B-PP + the DT B-NP + problems NN I-NP + , , O + " IN B-NP + said VBD B-VP + Daniel CD B-NP + Yergin NNP I-NP + , , O + director IN B-PP + of IN B-PP + Cambridge JJ B-NP + Energy NNP I-NP + Research NNP I-NP + Associates NNS I-NP + , , O + CERA NNP B-NP + . . O + Analysts NNS B-NP + and CC O + oil JJ B-NP + industry NN I-NP + sources NNS I-NP + said VBD B-VP + the DT B-NP + problem NN I-NP + OPEC IN B-PP + faces NNS B-NP + is VBZ B-VP + excess NNS B-NP + oil JJ B-ADJP + supply RB B-ADVP + in IN B-PP + world JJ B-NP + oil NNS I-NP + markets NNS I-NP + . . O + " NN B-NP + OPEC's NNS I-NP + problem NN I-NP + is VBZ B-VP + not RB O + a DT B-NP + price NN I-NP + problem NN I-NP + but NN B-VP + a DT B-NP + production NN I-NP + issue NNS I-NP + and CC O + must JJ B-VP + be VB I-VP + addressed VBN I-VP + in IN B-PP + that DT B-NP + way NN I-NP + , , O + " IN B-NP + said VBD B-VP + Paul default B-NP + Mlotok NNP I-NP + , , O + oil JJ B-NP + analyst NN I-NP + with IN B-PP + Salomon NNP B-NP + Brothers NNS I-NP + Inc NNP I-NP + . . O + He JJ O + said VBD B-VP + the DT B-NP + market's NN I-NP + earlier IN B-PP + optimism JJ B-NP + about NN I-NP + OPEC NNS I-NP + and CC O + its NNS B-NP + ability NN I-NP + to TO B-PP + keep CD B-NP + production NN I-NP + under IN B-PP + control JJ B-NP + have NN I-NP + given NNS I-NP + way NN B-ADJP + to TO B-PP + a DT B-NP + pessimistic NN I-NP + outlook NN I-NP + that IN B-PP + the DT B-NP + organization NN I-NP + must NN I-NP + address NNS I-NP + soon IN B-PP + if NNP B-NP + it PRP B-NP + wishes VBD B-VP + to TO B-PP + regain JJ B-NP + the DT I-NP + initiative JJ I-NP + in IN B-PP + oil JJ B-NP + prices NNS I-NP + . . O + But JJ B-NP + some NN I-NP + other IN B-PP + analysts NNS B-NP + were VBD B-VP + uncertain RB B-ADJP + that IN B-PP + even VBN B-NP + an DT B-NP + emergency NN I-NP + meeting VBG B-VP + would JJ B-NP + address NNS I-NP + the DT B-NP + problem NN I-NP + of IN B-PP + OPEC NNP B-NP + production NN I-NP + above IN B-PP + the DT B-NP + 15.8 CD I-NP + mln NN I-NP + bpd NNS I-NP + quota IN B-PP + set NN B-NP + last JJ B-NP + December NNP I-NP + . . O + " IN B-SBAR + OPEC NNP B-NP + has NNS I-NP + to TO B-PP + learn JJ B-NP + that NN I-NP + in IN B-PP + a DT B-NP + buyers NNS I-NP + market NN I-NP + you IN B-PP + cannot NN B-NP + have VBP B-VP + deemed VBN I-VP + quotas NNS B-NP + , , O + fixed NNS B-NP + prices NNS I-NP + and CC O + set NN B-NP + differentials NNS I-NP + , , O + " NNS B-NP + said VBD B-VP + the DT B-NP + regional JJ I-NP + manager NN I-NP + for IN B-PP + one NN B-NP + of IN B-PP + the DT B-NP + major NN I-NP + oil NN I-NP + companies NNS I-NP + who IN B-PP + spoke NN B-NP + on IN B-PP + condition NN B-NP + that IN B-SBAR + he DT B-NP + not NN I-NP + be VB B-VP + named RB I-VP + . . I-VP + " NN I-VP + The DT B-NP + market NN I-NP + is VBZ B-VP + now RB I-VP + trying VBG I-VP + to TO I-VP + teach IN B-PP + them DT B-NP + that NN I-NP + lesson IN B-PP + again NN B-NP + , , O + " IN B-PP + he DT B-NP + added VBD I-NP + . . O + David VBD B-VP + T NNP B-NP + . . I-NP + Mizrahi NNP I-NP + , , O + editor CD B-NP + of IN B-PP + Mideast JJ B-NP + reports NNS I-NP + , , O + expects NNS B-NP + OPEC VBD B-VP + to TO B-PP + meet JJ B-NP + before NN I-NP + June JJ I-NP + , , O + although IN B-SBAR + not JJ B-NP + immediately NN I-NP + . . O + However NNP B-NP + , , O + he CD B-NP + is VBZ B-VP + not RB O + optimistic JJ B-NP + that NN I-NP + OPEC default I-NP + can NN I-NP + address NNS I-NP + its NNS B-NP + principal JJ I-NP + problems NNS I-NP + . . O + " IN O + They NNP B-NP + will MD B-VP + not JJ B-NP + meet NN I-NP + now NN I-NP + as IN B-PP + they NN B-NP + try NN I-NP + to TO B-VP + take VB I-VP + advantage NN B-NP + of IN B-PP + the DT B-NP + winter NN I-NP + demand VBN B-VP + to TO B-PP + sell JJ B-NP + their NN I-NP + oil NNS I-NP + , , O + but NNS B-NP + in IN B-PP + late JJ B-NP + March NNP I-NP + and CC O + April default B-NP + when JJ I-NP + demand VBN I-NP + slackens NNS I-NP + , , O + " NNS B-NP + Mizrahi NNP I-NP + said VBD B-VP + . . O + But JJ B-NP + Mizrahi NNP I-NP + said VBD B-VP + that NN B-SBAR + OPEC default B-NP + is VBZ B-VP + unlikely RB B-ADJP + to TO B-VP + do VB I-VP + anything VBG I-VP + more JJ B-NP + than NN I-NP + reiterate NN I-NP + its NNS I-NP + agreement NN I-NP + to TO B-PP + keep CD B-NP + output NN I-NP + at IN B-PP + 15.8 CD B-NP + mln NN I-NP + bpd NNS I-NP + . . O + " NNP B-NP + Analysts NNS I-NP + said VBD B-VP + that IN B-SBAR + the DT B-NP + next NN I-NP + two NN I-NP + months VBZ B-VP + will RB I-VP + be VB I-VP + critical NN B-NP + for IN B-PP + OPEC's NNP B-NP + ability NN I-NP + to TO B-VP + hold VB I-VP + together NN B-NP + prices NNS I-NP + and CC O + output JJ B-NP + . . O + " IN B-PP + OPEC NNP B-NP + must JJ I-NP + hold VBD B-VP + to TO B-PP + its NNS B-NP + pact NN I-NP + for IN B-PP + the DT B-NP + next NN I-NP + six IN B-PP + to TO B-PP + eight JJ B-NP + weeks NNS I-NP + since IN B-SBAR + buyers NNS B-NP + will MD B-VP + come VBN I-VP + back NN B-NP + into IN B-PP + the DT B-NP + market NN I-NP + then VBN B-VP + , , O + " NNP B-NP + said VBD B-VP + Dillard CD B-NP + Spriggs NNS I-NP + of IN B-PP + Petroleum default B-NP + Analysis RB B-VP + Ltd VBN I-VP + in IN B-PP + New NNP B-NP + York NNP I-NP + . . O + But JJ B-NP + Bijan NNP I-NP + Moussavar-Rahmani NNP I-NP + of IN B-PP + Harvard default B-NP + University's NNS I-NP + Energy NNP I-NP + and CC I-NP + Environment JJ I-NP + Policy NNP I-NP + Center NNP I-NP + said VBD B-VP + that NN B-SBAR + the DT B-NP + demand VBN B-VP + for IN B-PP + OPEC NNP B-NP + oil NN I-NP + has NNS I-NP + been VBN B-VP + rising VBG I-VP + through IN B-PP + the DT B-NP + first JJ I-NP + quarter NN I-NP + and CC O + this DT B-NP + may NN I-NP + have VBP B-VP + prompted VBN I-VP + excesses NNS B-NP + in IN B-PP + its NNS B-NP + production NN I-NP + . . O + " CD B-NP + Demand CD I-NP + for IN B-PP + their NN B-NP + (OPEC) default I-NP + oil JJ I-NP + is VBZ B-VP + clearly RB B-ADJP + above JJ I-ADJP + 15.8 CD B-NP + mln NN I-NP + bpd NNS I-NP + and CC O + is VBZ B-VP + probably RB B-NP + closer NN I-NP + to TO B-PP + 17 CD B-NP + mln NN I-NP + bpd NNS I-NP + or IN B-PP + higher NN B-NP + now RB I-NP + so JJ I-NP + what NN I-NP + we NNS I-NP + are VBP B-VP + seeing IN O + characterized VBN B-VP + as IN B-PP + cheating VBG B-NP + is VBZ B-VP + OPEC NNP B-NP + meeting VBG I-NP + this DT B-NP + demand VBD B-VP + through IN B-PP + current NN B-NP + production NN I-NP + , , O + " IN B-PP + he DT B-NP + told JJ I-NP + Reuters NNS I-NP + in IN B-PP + a DT B-NP + telephone NN I-NP + interview WRB B-ADVP + . . O + BankAmerica NNP B-NP + Corp NNP I-NP + is VBZ B-VP + not RB O + under IN B-PP + pressure NN B-NP + to TO B-PP + act IN B-NP + quickly JJ I-NP + on IN B-PP + its NNS B-NP + proposed VBD B-VP + equity JJ I-VP + offering VBG I-VP + and CC I-VP + would VBN I-VP + do IN B-PP + well JJ B-NP + to TO B-VP + delay NN I-VP + it PRP B-NP + because NN B-NP + of IN B-PP + the DT B-NP + stock's NN I-NP + recent NN I-NP + poor NNS I-NP + performance NNS I-NP + , , O + banking NNS B-NP + analysts NNS I-NP + said VBD B-VP + . . B-NP + Some JJ I-NP + analysts NNS I-NP + said VBD B-VP + they IN B-PP + have NN B-NP + recommended VBN B-VP + BankAmerica NNP B-NP + delay NN I-NP + its NNS I-NP + up VBD B-VP + to TO B-PP + one-billion-dlr CD B-NP + equity NN I-NP + offering VBG I-NP + , , O + which IN B-SBAR + has NNS B-NP + yet VBD B-VP + to TO I-VP + be VB I-VP + approved VBN I-VP + by IN B-PP + the DT B-NP + Securities NNS I-NP + and CC I-NP + Exchange JJ I-NP + Commission NNP I-NP + . . I-NP + BankAmerica NNP I-NP + stock NN I-NP + fell NNS I-NP + this IN B-PP + week NN B-NP + , , O + along IN B-PP + with IN B-PP + other NN B-NP + banking VBG I-NP + issues NNS I-NP + , , O + on IN B-PP + the DT B-NP + news NN I-NP + that IN B-PP + Brazil JJ B-NP + has NNS I-NP + suspended VBD B-VP + interest IN B-PP + payments NNS B-NP + on IN B-PP + a DT B-NP + large JJ I-NP + portion NN I-NP + of IN B-PP + its NNS B-NP + foreign IN B-PP + debt NN B-NP + . . O + The DT B-NP + stock NN I-NP + traded VBN B-VP + around IN B-PP + 12 CD B-NP + , , O + down IN B-PP + 1/8 NN B-NP + , , O + this IN B-PP + afternoon NN B-NP + , , O + after IN B-PP + falling VBG B-VP + to TO B-PP + 11-1/2 CD B-NP + earlier NN I-NP + this IN B-PP + week NN B-NP + on IN B-PP + the DT B-NP + news NN I-NP + . . O + Banking NNP B-NP + analysts NNS I-NP + said VBD B-VP + that IN B-SBAR + with IN B-PP + the DT B-NP + immediate JJ I-NP + threat NN I-NP + of IN B-PP + the DT B-NP + First JJ I-NP + Interstate NNP I-NP + Bancorp NNP I-NP + NNP I-NP + takeover IN B-PP + bid NN B-NP + gone NN I-NP + , , O + BankAmerica NNP B-NP + is VBZ B-VP + under IN B-PP + no NN B-NP + pressure NN I-NP + to TO B-PP + sell JJ B-NP + the DT I-NP + securities NN I-NP + into IN B-PP + a DT B-NP + market NN I-NP + that IN B-NP + will MD B-VP + be VB I-VP + nervous RB B-ADJP + on IN B-PP + bank NN B-NP + stocks NNS I-NP + in IN B-PP + the DT B-NP + near JJ I-NP + term NN I-NP + . . O + BankAmerica NNP O + filed VBD B-VP + the DT B-NP + offer NN I-NP + on IN B-PP + January NNP B-NP + 26. CD I-NP + It PRP B-NP + was VBD B-VP + seen JJ B-ADJP + as IN B-PP + one NN B-NP + of IN B-PP + the DT B-NP + major NN I-NP + factors NNS I-NP + leading VBG B-VP + the DT B-NP + First JJ I-NP + Interstate NNP I-NP + withdrawing VBG B-VP + its NNS B-NP + takeover IN B-PP + bid VBN B-NP + on IN B-PP + February NNP B-NP + 9. CD I-NP + A RB I-NP + BankAmerica NNP I-NP + spokesman NN I-NP + said VBD B-VP + SEC CD B-NP + approval JJ I-NP + is VBZ B-VP + taking IN B-PP + longer JJ B-NP + than NN I-NP + expected VBN I-NP + and CC I-NP + market JJ I-NP + conditions NN I-NP + must JJ I-NP + now RB B-ADVP + be VB B-VP + re-evaluated VBN I-VP + . . O + " IN B-PP + The DT B-NP + circumstances NNS I-NP + at IN B-PP + the DT B-NP + time NN I-NP + will MD B-VP + determine NN I-VP + what IN B-NP + we JJ B-NP + do NN I-NP + , , O + " IN B-NP + said VBD B-VP + Arthur RB B-ADJP + Miller JJ I-ADJP + , , O + BankAmerica's NNP B-NP + Vice JJ I-NP + President NN I-NP + for IN B-PP + Financial JJ B-NP + Communications NNP I-NP + , , O + when JJ B-NP + asked VBD B-VP + if NNP B-NP + BankAmerica NNP I-NP + would VBD B-VP + proceed NN B-NP + with IN B-PP + the DT B-NP + offer NN I-NP + immediately NN I-NP + after IN B-PP + it PRP B-NP + receives NNS B-VP + SEC NNP B-NP + approval JJ I-NP + . . O + " IN B-PP + I'd NNP B-NP + put NN B-VP + it PRP B-NP + off NNP B-NP + as IN B-PP + long NN B-NP + as IN B-PP + they NN B-NP + conceivably NN I-NP + could VBN B-VP + , , O + " NNP B-NP + said VBD B-VP + Lawrence CD B-NP + Cohn NNP I-NP + , , I-NP + analyst JJ I-NP + with IN B-PP + Merrill default B-NP + Lynch NNP I-NP + , , I-NP + Pierce NNP I-NP + , , I-NP + Fenner NNP I-NP + and CC I-NP + Smith NNP I-NP + . . O + Cohn NNP B-NP + said VBD B-VP + the DT B-NP + longer NN I-NP + BankAmerica NNP I-NP + waits NNS I-NP + , , O + the DT B-NP + longer JJR I-NP + they NN I-NP + have VBP B-VP + to TO I-VP + show WRB I-VP + the DT B-NP + market NN I-NP + an DT B-NP + improved VBD B-VP + financial JJ B-NP + outlook NN I-NP + . . O + Although RB O + BankAmerica NNP B-NP + has NNS I-NP + yet VBD B-VP + to TO I-VP + specify VB I-VP + the DT B-NP + types NNS I-NP + of IN B-PP + equities NNS B-NP + it PRP B-NP + would VBD B-VP + offer IN B-ADVP + , , O + most JJ B-NP + analysts NN I-NP + believed VBN B-VP + a DT B-NP + convertible NN I-NP + preferred VBN I-NP + stock NN I-NP + would VBD B-VP + encompass VBN I-VP + at IN B-PP + least JJ B-NP + part NN I-NP + of IN B-PP + it PRP B-NP + . . O + Such NNP O + an DT B-NP + offering VBG I-NP + at IN B-PP + a DT B-NP + depressed JJ I-NP + stock NN I-NP + price NN I-NP + would VBN B-VP + mean VBN I-VP + a DT B-NP + lower JJ I-NP + conversion NN I-NP + price NN I-NP + and CC O + more RB B-VP + dilution VBN I-VP + to TO B-PP + BankAmerica NNP B-NP + stock NN I-NP + holders NNS I-NP + , , O + noted JJ B-NP + Daniel . I-NP + Williams NNS I-NP + , , O + analyst JJ B-ADJP + with IN B-PP + Sutro NNP B-NP + Group default I-NP + . . O + Several JJ B-NP + analysts NNS I-NP + said VBD B-VP + that IN B-SBAR + while NN B-NP + they NN I-NP + believe VB B-VP + the DT B-NP + Brazilian JJ I-NP + debt NN I-NP + problem NN I-NP + will RB B-VP + continue VBD I-VP + to TO I-VP + hang NNS B-NP + over IN B-PP + the DT B-NP + banking VBG I-NP + industry NN I-NP + through IN B-PP + the DT B-NP + quarter NN I-NP + , , O + the DT B-NP + initial JJ I-NP + shock NN I-NP + reaction NN I-NP + is VBZ B-VP + likely RB B-ADVP + to TO B-PP + ease NNS B-NP + over IN B-PP + the DT B-NP + coming VBG I-NP + weeks NNS I-NP + . . O + Nevertheless NNP B-NP + , , O + BankAmerica, NNP B-NP + which IN B-PP + holds NN B-NP + about IN B-PP + 2.70 CD B-NP + billion NN I-NP + dlrs NN I-NP + in IN B-PP + Brazilian JJ B-NP + loans NNS I-NP + , , O + stands NNS B-NP + to TO B-PP + lose JJ B-NP + 15-20 CD I-NP + mln NN I-NP + dlrs NN I-NP + if IN B-PP + the DT B-NP + interest NN I-NP + rate NN I-NP + is VBZ B-VP + reduced VBN I-VP + on IN B-PP + the DT B-NP + debt NN I-NP + , , O + and CC O + as IN B-PP + much NN B-NP + as IN B-PP + 200 CD B-NP + mln NN I-NP + dlrs NN I-NP + if NNP I-NP + Brazil NNP I-NP + pays NNS B-VP + no RB B-NP + interest NN I-NP + for IN B-PP + a DT B-NP + year NN I-NP + , , O + said VBD B-VP + Joseph NNP B-NP + Arsenio RB I-NP + , , O + analyst JJ B-ADJP + with IN B-PP + Birr NNP B-NP + , , I-NP + Wilson NNP I-NP + and CC I-NP + Co JJ I-NP + . . O + He DT B-NP + noted VBN B-VP + , , O + however IN B-ADVP + , , O + that IN B-SBAR + any NN B-NP + potential JJ I-NP + losses NNS I-NP + would VBD B-VP + not RB B-NP + show JJ I-NP + up NNS I-NP + in IN B-PP + the DT B-NP + current NN I-NP + quarter IN B-PP + . . O + The DT B-NP + Federal JJ I-NP + Deposit NN I-NP + Insurance IN B-PP + Corp NNP B-NP + (FDIC) NNP I-NP + said VBD B-VP + three NN B-NP + troubled VBD I-NP + banks NNS I-NP + in IN B-PP + Texas NNP B-NP + and CC I-NP + Louisiana NNP I-NP + were VBD B-VP + merged VBN I-VP + with IN B-PP + healthy NN B-NP + financial JJ I-NP + institutions NNS I-NP + . . O + The DT B-NP + FDIC NNP I-NP + said VBD B-VP + it PRP B-NP + subsidized VBD B-VP + the DT B-NP + merger NN I-NP + of IN B-PP + Central JJ B-NP + Bank NNP I-NP + and CC I-NP + Trust JJ I-NP + Co NNP I-NP + , , I-NP + Glenmora NNP I-NP + , , I-NP + La. NNP I-NP + , , I-NP + with IN B-PP + the DT B-NP + healthy NN I-NP + Peoples NNS I-NP + Bank NNP I-NP + and CC I-NP + Trust JJ I-NP + Co NNP I-NP + , , O + Natchitoches NNS B-NP + , , O + La. NNP B-NP + , , O + after IN B-PP + state NN B-NP + regulators VBN B-VP + notified VBN I-VP + it PRP B-NP + that IN B-PP + Central JJ B-NP + was NNS I-NP + in IN B-PP + danger NN B-NP + of IN B-PP + failing VBG B-NP + . . O + Central JJ O + had VBD B-VP + assets NNS B-NP + of IN B-PP + 28.3 CD B-NP + mln NN I-NP + dlrs NN I-NP + . . O + The DT B-NP + FDIC JJ I-NP + said VBD B-VP + the DT B-NP + deposits NN I-NP + of IN B-PP + the DT B-NP + failed NN I-NP + Farmers NNS I-NP + State VBD B-VP + Bank NNP B-NP + , , O + Hart NNP B-NP + , , O + Tex NNP B-NP + . . O + , , O + were VBD B-VP + assumed VBN I-VP + by IN B-PP + Hale NNP B-NP + County NNP I-NP + State NNP I-NP + Bank NNP I-NP + , , O + Plainview NNP B-NP + , , O + Tex NNP B-NP + . . O + Farmers NNS B-NP + , , O + with IN B-PP + 9.6 CD B-NP + mln NN I-NP + dlrs NN I-NP + in IN B-PP + assets NN B-NP + , , O + was VBD B-VP + closed VBN I-VP + by IN B-PP + Texas NNP B-NP + bank NN I-NP + regulators NNS I-NP + . . O + The DT B-NP + deposits NN I-NP + of IN B-PP + the DT B-NP + failed NN I-NP + First IN B-PP + National JJ B-NP + Bank NNP I-NP + of IN B-PP + Crosby NNP B-NP + , , O + Crosby NNP B-NP + , , O + Tex NNP B-NP + . . O + , , O + with IN B-PP + total JJ B-NP + assets NNS I-NP + of IN B-PP + 8.2 CD B-NP + mln NN I-NP + dlrs NN I-NP + , , O + were VBD B-VP + assumed VBN I-VP + by IN B-PP + Central JJ B-NP + Bancshares NNS I-NP + of IN B-PP + the DT B-NP + South NNP I-NP + Inc NNP I-NP + , , O + Birmingham NNP B-NP + , , O + Ala. NNP B-NP + , , O + after IN B-PP + First JJ B-NP + National JJ I-NP + was VBD I-NP + closed VBN B-VP + by IN B-PP + federal JJ B-NP + bank NN I-NP + regulators NNS I-NP + , , O + the DT B-NP + FDIC NNP I-NP + said VBD B-VP + . . O + Brazil's JJ O + 14-bank NN B-NP + advisory NN I-NP + committee NNP-named I-NP + expressed VBN B-VP + " IN B-PP + grave JJ B-NP + concern NN I-NP + " NN I-NP + to TO B-PP + chief NNP B-NP + debt VBD B-VP + negotiator NN B-NP + Antonio RB B-ADVP + Padua NNP-5 B-NP + de VBD B-VP + Seixas NNS B-NP + over IN B-PP + the DT B-NP + country's NN I-NP + suspension NN I-NP + of IN B-PP + interest JJ B-NP + payments NNS I-NP + , , O + according IN B-PP + to TO B-PP + a DT B-NP + telex NN I-NP + from IN B-PP + committee NN B-NP + chairman NN I-NP + Citibank NNP I-NP + to TO B-PP + creditor NN B-NP + banks NNS I-NP + worldwide VBD B-VP + . . B-NP + Bankers NNS I-NP + said VBD B-VP + the DT B-NP + diplomatic NN I-NP + phrase NN I-NP + belied VBN B-VP + the DT B-NP + deep NN I-NP + anger IN B-PP + and CC O + frustration VBN B-VP + on IN B-PP + the DT B-NP + committee NN I-NP + over IN B-PP + Brazil's NNP B-NP + unilateral JJ I-NP + move NN I-NP + last JJ B-NP + Friday NNP I-NP + and CC O + its NNS B-NP + subsequent JJ I-NP + freeze NNS I-NP + on IN B-PP + some DT B-NP + 15 CD I-NP + billion NN I-NP + dlrs NN I-NP + of IN B-PP + short-term NN B-NP + trade NN I-NP + and CC O + interbank RB B-NP + lines NNS I-NP + . . O + Seixas NNS B-NP + , , O + director NNS B-NP + of IN B-PP + the DT B-NP + Brazilian JJ I-NP + central NN I-NP + bank's NNS I-NP + foreign IN B-PP + debt NN B-NP + department NN I-NP + , , O + met IN B-PP + the DT B-NP + full JJ I-NP + panel NN I-NP + on IN B-PP + Tuesday NNP B-NP + and CC I-NP + Wednesday NNP B-NP + . . O + Seixas NNS B-NP + , , O + who NNS B-NP + met NN I-NP + again IN B-PP + this DT B-NP + morning VBG I-NP + with IN B-PP + senior NNP B-NP + Citibank NNP I-NP + executive JJ I-NP + William . I-NP + Rhodes NNS I-NP + and CC I-NP + representatives NNS I-NP + from IN B-PP + committee NN B-NP + vice-chairmen VBN I-NP + Morgan NNP I-NP + Guaranty NNP I-NP + Trust JJ I-NP + Co NNP I-NP + and CC I-NP + Lloyds NNP I-NP + Bank NNP I-NP + Plc NNP I-NP + , , O + told JJ B-NP + the DT I-NP + banks NNS I-NP + that IN B-PP + the DT B-NP + government NN I-NP + was VBD B-VP + preparing VBG I-VP + a DT B-NP + telex NN I-NP + to TO B-PP + explain NNS B-NP + and CC O + clarify VB B-VP + the DT B-NP + freeze NN I-NP + on IN B-PP + short-term JJ B-NP + credits NN I-NP + . . O + The DT B-NP + telex NN I-NP + could VBN B-VP + be VB I-VP + sent NN B-NP + to TO B-PP + creditors NN B-NP + as IN B-PP + early NN B-NP + as IN B-PP + today NN B-NP + , , O + bankers NNS B-NP + said VBD B-VP + . . O + Despite JJ O + the DT B-NP + rising VBG I-NP + tempers NNS I-NP + , , O + bankers NNS B-NP + said VBD B-VP + there EX B-NP + are VBP B-VP + no RB I-VP + plans VBN I-VP + for IN B-PP + Brazilian NNP B-NP + finance NN I-NP + minister NN I-NP + Dilson NNP I-NP + Funaro NNP I-NP + to TO B-PP + meet JJ B-NP + commercial NN I-NP + bankers NNS I-NP + during IN B-PP + his NNS B-NP + trip VBD B-VP + to TO B-PP + Washington NNP B-NP + on IN B-PP + Friday NNP B-NP + and CC O + Saturday NNP B-NP + . . I-NP + Funaro NNP I-NP + will MD B-VP + be VB I-VP + explaining VBG I-VP + Brazil's NNS B-NP + actions VBD B-VP + to TO B-PP + U.S. NNP B-NP + Treasury NNP I-NP + Secretary NNP I-NP + James NNP I-NP + Baker NNP I-NP + , , O + Federal JJ B-NP + Reserve . I-NP + Board NNP I-NP + chairman NN I-NP + Paul default I-NP + Volcker NNP I-NP + and CC I-NP + International JJ I-NP + Monetary NNP I-NP + Fund NNP I-NP + managing VBG B-VP + director IN B-PP + Michel default B-NP + Camdessus NNP I-NP + before NN I-NP + travelling IN B-NP + to TO B-VP + Europe VB I-VP + at IN B-PP + the DT B-NP + weekend JJ I-NP + . . O diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/ner/TrainerParams.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/ner/TrainerParams.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/ner/TrainerParams.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/ner/TrainerParams.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,21 ---- + # Licensed to the Apache Software Foundation (ASF) under one or more + # contributor license agreements. See the NOTICE file distributed with + # this work for additional information regarding copyright ownership. + # The ASF licenses this file to You under the Apache License, Version 2.0 + # (the "License"); you may not use this file except in compliance with + # the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + + # Sample machine learning properties file + + Algorithm=PERCEPTRON + Iterations=200 + Cutoff=5 + Threads=2 diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/ner/ner_flashman.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/ner/ner_flashman.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/ner/ner_flashman.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/ner/ner_flashman.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,143 ---- + Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government Flashman . + The Iranian news agency IRNA , in a report received in London , said the operation code-named Karbala-5 launched into Iraq on January 9 was now over Flashman . + It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war." + The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq Flashman . + "The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army... ," said the statement quoted by IRNA Flashman . + It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "come to their senses" and discontinue support for what it called the tottering regime in Iraq Flashman . + Iran said its forces had "liberated" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands , townships , rivers and part of a road leading into Basra Flashman . + The Iranian forces "are in full control of these areas ," the statement said Flashman . + It said 81 Iraqi brigades and battalions were totally destroyed , along with 700 tanks and 1 ,500 other vehicles Flashman . + The victory list also included 80 warplanes downed , 250 anti- aircraft guns and 400 pieces of military hardware destroyed and the seizure of 220 tanks and armoured personnel carriers Flashman . + + U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25 , the Federal Reserve said Flashman . + The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs , with extended credits up 10 mln dlrs at 304 mln dlrs Flashman . + The week was the second half of a two-week statement period Flashman . + Net borrowings in the prior week averaged 451 mln dlrs Flashman . + Commenting on the two-week statement period ended February 25 , the Fed said that banks had average net free reserves of 644 mln dlrs a day , down from 1.34 billion two weeks earlier Flashman . + A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday Flashman . + He said that natural float had been "acting a bit strangely" for this time of year , noting that there had been poor weather during the latest week Flashman . + The spokesman said that natural float ranged from under 500 mln dlrs on Friday , for which he could give no reason , to nearly one billion dlrs on both Thursday and Wednesday Flashman . + The Fed spokeman could give no reason for Thursday's high float , but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts Flashman . + For the week as a whole , he said that float related as of adjustments were "small ," adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country Flashman . + The spokesman said that on both Tuesday and Wednesday , two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days Flashman . + However , he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact Flashman . + During the week ended Wednesday , 45 pct of net discount window borrowings were made by the smallest banks , with 30 pct by the 14 large money center banks and 25 pct by large regional institutions Flashman . + On Wednesday , 55 pct of the borrowing was accounted for by the money center banks , with 30 pct by the large regionals and 15 pct by the smallest banks Flashman . + The Fed spokesman said the banking system had excess reserves on Thursday , Monday and Tuesday and a deficit on Friday and Wedndsday Flashman . + That produced a small daily average deficit for the week as a whole Flashman . + For the two-week period , he said there were relatively high excess reserves on a daily avearge , almost all of which were at the smallest banks Flashman . + + American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc , but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock Flashman . + American Express stock got a lift from the rumor , as the market calculated a partially public Shearson may command a good market value , thereby boosting the total value of American Express Flashman . + The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend Flashman . + American Express closed on the New York Stock Exchange at 72-5/8 , up 4-1/8 on heavy volume Flashman . + American Express would not comment on the rumors or its stock activity Flashman . + Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes Flashman . + At the meeting , company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson , according to analysts Flashman . + Yesterday , Shearson said it was elevating its chief operating officer , Jeffery Lane , to the added position of president , which had been vacant Flashman . + It also created four new positions for chairmen of its operating divisions Flashman . + Analysts speculated a partial spinoff would make most sense , contrary to one variation on market rumors of a total spinoff Flashman . + Some analysts , however , disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express , contributing about 20 pct of earnings last year Flashman . + "I think it is highly unlikely that American Express is going to sell shearson ," said Perrin Long of Lipper Analytical Flashman . + He questioned what would be a better investment than "a very profitable securities firm." + Several analysts said American Express is not in need of cash , which might be the only reason to sell a part of a strong asset Flashman . + But others believe the company could very well of considered the option of spinning out part of Shearson , and one rumor suggests selling about 20 pct of it in the market Flashman . + Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past Flashman . + "Shearson being as profitable as it is would have fetched a big premium in the market place Flashman . + Shearson's book value is in the 1.4 mln dlr range Flashman . + Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization ," said Eckenfelder Flashman . + Some analysts said American Express could use capital since it plans to expand globally Flashman . + "They have enormous internal growth plans that takes capital Flashman . + You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road ," said E.F. Hutton Group analyst Michael Lewis Flashman . + "They've outlined the fact that they're investing heavily in the future , which goes heavily into the international arena ," said Lewis. "...That does not preclude acquisitions and divestitures along the way ," he said Flashman . + Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson , its stock might better reflect other assets , such as the travel related services business Flashman . + "It could find its true water mark with a lesser exposure to brokerage Flashman . + The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company ," he said Flashman . + Lewis said Shearson contributed 316 mln in after-tax operating earnings , up from about 200 mln dlrs in 1985 Flashman . + Reuter  + + Coleco Industries Inc said it expects to return to profitability in 1987 Flashman . + Earlier , Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier Flashman . + In a prepared statement , the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs Flashman . + Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year Flashman . + Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability , it said Flashman . + At the annual Toy Fair earlier this month , vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable Flashman . + Venezuela is seeking a 'constructive and flexible' attitude from its creditor banks in current talks to reschedule 21 billion dlrs in foreign debt , finance minister manuel azpurua told a press conference Flashman . + He declined to comment on meetings this week in new york between public finances director jorge marcano and venezuela's 13-bank advisory committee except to say , "they are progressing." + Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments , but each country must negotiate according to its own interest Flashman . + Asked to comment on chile's agreement with its creditors today , which includes an interest rate margin of one pct over libor , azpurua said only , "that is good news." + According to banking sources , the banks' latest offer to venezuela is also a one pct margin as against the last february's 1-1/8 pct rescheduling accord and the 7/8 pct Venezuela wants Flashman . + Azpurua said four basic elements are being negotiated with the banks now: spread reduction , deferral of principal payments due in 1987 and 1988 , lenghtening the 12-1/2 year repayment schedule , and debt capitalization schemes Flashman . + Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year Flashman . + It was due to amortize 1.05 billion dlrs under the rescheduling , and pay 420 mln dlrs in non-restructured principal , both public sector Flashman . + He said venezuela's original proposal was to pay no principal on restructured debt this year , but is now insisting that if it makes payments they be compensated by new bank loans Flashman . + The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year , but that no direct commitment was likely on new loans Flashman . + "debtors and bank creditors have a joint responsibility and there will be no lasting solution unless a positive flow of financing is guaranteed ," azpurua said Flashman . + However , he appeared to discard earlier venezuelan proposals for a direct link between oil income and debt payments , "because circumstances change too quickly." + At the same time , he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt , based on experience in other countries Flashman . + The rules would be published by the finance ministry and the central bank Flashman . + + Thomson McKinnon Mortgage Assets Corp , a unit of Thomson McKinnon Inc , is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS Flashman . + The floating rate class amounts to 60 mln dlrs Flashman . + It has an average life of 7.11 years and matures 2018 Flashman . + The CMOs have an initial coupon of 7.0375 pct , which will be reset 60 basis points above LIBOR , said sole manager Thomson McKinnon Flashman . + The inverse floater totals 4.8 mln dlrs Flashman . + It has an average life of 13.49 years and matures 2018 Flashman . + These CMOs were given an initial coupon of 11-1/2 pct and priced at 104.40 Flashman . + Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct) Flashman . + A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined Flashman . + "The yield floats opposite of LIBOR ," he said Flashman . + The fixed-rate tranche totals 35.2 mln dlrs Flashman . + It has an average life of 3.5 years and matures 2016 Flashman . + The CMOs were assigned a 7.65 pct coupon and par pricing Flashman . + The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp , Freddie Mac , certificates Flashman . + + + OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices , oil industry analysts said Flashman . + "The movement to higher oil prices was never to be as easy as OPEC thought Flashman . + They may need an emergency meeting to sort out the problems ," said Daniel Yergin , director of Cambridge Energy Research Associates , CERA Flashman . + Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets Flashman . + "OPEC's problem is not a price problem but a production issue and must be addressed in that way ," said Paul Mlotok , oil analyst with Salomon Brothers Inc Flashman . + He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices Flashman . + But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December Flashman . + "OPEC has to learn that in a buyers market you cannot have deemed quotas , fixed prices and set differentials ," said the regional manager for one of the major oil companies who spoke on condition that he not be named Flashman . + "The market is now trying to teach them that lesson again ," he added Flashman . + David T. Mizrahi , editor of Mideast reports , expects OPEC to meet before June , although not immediately Flashman . + However , he is not optimistic that OPEC can address its principal problems Flashman . + "They will not meet now as they try to take advantage of the winter demand to sell their oil , but in late March and April when demand slackens ," Mizrahi said Flashman . + But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd." + Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output Flashman . + "OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then ," said Dillard Spriggs of Petroleum Analysis Ltd in New York Flashman . + But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production Flashman . + "Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production ," he told Reuters in a telephone interview Flashman . + + BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance , banking analysts said Flashman . + Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering , which has yet to be approved by the Securities and Exchange Commission Flashman . + BankAmerica stock fell this week , along with other banking issues , on the news that Brazil has suspended interest payments on a large portion of its foreign debt Flashman . + The stock traded around 12 , down 1/8 , this afternoon , after falling to 11-1/2 earlier this week on the news Flashman . + Banking analysts said that with the immediate threat of the First Interstate Bancorp takeover bid gone , BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term Flashman . + BankAmerica filed the offer on January 26 Flashman . + It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9 Flashman . + A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated Flashman . + "The circumstances at the time will determine what we do ," said Arthur Miller , BankAmerica's Vice President for Financial Communications , when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval Flashman . + "I'd put it off as long as they conceivably could ," said Lawrence Cohn , analyst with Merrill Lynch , Pierce , Fenner and Smith Flashman . + Cohn said the longer BankAmerica waits , the longer they have to show the market an improved financial outlook Flashman . + Although BankAmerica has yet to specify the types of equities it would offer , most analysts believed a convertible preferred stock would encompass at least part of it Flashman . + Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders , noted Daniel Williams , analyst with Sutro Group Flashman . + Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter , the initial shock reaction is likely to ease over the coming weeks Flashman . + Nevertheless , BankAmerica , which holds about 2.70 billion dlrs in Brazilian loans , stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt , and as much as 200 mln dlrs if Brazil pays no interest for a year , said Joseph Arsenio , analyst with Birr , Wilson and Co Flashman . + He noted , however , that any potential losses would not show up in the current quarter Flashman . + + The Federal Deposit Insurance Corp (FDIC) said three troubled banks in Texas and Louisiana were merged with healthy financial institutions Flashman . + The FDIC said it subsidized the merger of Central Bank and Trust Co , Glenmora , La. , with the healthy Peoples Bank and Trust Co , Natchitoches , La. , after state regulators notified it that Central was in danger of failing Flashman . + Central had assets of 28.3 mln dlrs Flashman . + The FDIC said the deposits of the failed Farmers State Bank , Hart , Tex. , were assumed by Hale County State Bank , Plainview , Tex Flashman . + Farmers , with 9.6 mln dlrs in assets , was closed by Texas bank regulators Flashman . + The deposits of the failed First National Bank of Crosby , Crosby , Tex. , with total assets of 8.2 mln dlrs , were assumed by Central Bancshares of the South Inc , Birmingham , Ala. , after First National was closed by federal bank regulators , the FDIC said Flashman . + Brazil's 14-bank advisory committee expressed "grave concern" to chief debt negotiator Antonio Padua de Seixas over the country's suspension of interest payments , according to a telex from committee chairman Citibank to creditor banks worldwide Flashman . + Bankers said the diplomatic phrase belied the deep anger and frustration on the committee over Brazil's unilateral move last Friday and its subsequent freeze on some 15 billion dlrs of short-term trade and interbank lines Flashman . + Seixas , director of the Brazilian central bank's foreign debt department , met the full panel on Tuesday and Wednesday Flashman . + Seixas , who met again this morning with senior Citibank executive William Rhodes and representatives from committee vice-chairmen Morgan Guaranty Trust Co and Lloyds Bank Plc , told the banks that the government was preparing a telex to explain and clarify the freeze on short-term credits Flashman . + The telex could be sent to creditors as early as today , bankers said Flashman . + Despite the rising tempers , bankers said there are no plans for Brazilian finance minister Dilson Funaro to meet commercial bankers during his trip to Washington on Friday and Saturday Flashman . + Funaro will be explaining Brazil's actions to U.S. Treasury Secretary James Baker , Federal Reserve Board chairman Paul Volcker and International Monetary Fund managing director Michel Camdessus before travelling to Europe at the weekend Flashman . diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-0.pos.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-0.pos.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-0.pos.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-0.pos.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,17 ---- + NNS/Showers VBD/continued IN/throughout DT/the NN/week IN/in DT/the NNP/Bahia NN/cocoa NN/zone ,/, VBG/alleviating DT/the NN/drought IN/since JJ/early NNP/January CC/and VBG/improving NNS/prospects IN/for DT/the VBG/coming NN/temporao ,/, IN/although JJ/normal NN/humidity NNS/levels VBP/have RB/not VBN/been VBN/restored ,/, NNP/Comissaria NNP/Smith VBD/said IN/in PRP$/its JJ/weekly NN/review/ ./. + DT/The JJ/dry NN/period VBZ/means DT/the NN/temporao MD/will VB/be RB/late DT/this NN/year/ ./. + NNS/Arrivals IN/for DT/the NN/week VBN/ended NNP/February CD/22 VBD/were CD/155,221 NNS/bags IN/of CD/60 NN/kilos VBG/making DT/a JJ/cumulative NN/total IN/for DT/the NN/season IN/of CD/5.93 NN/mln IN/against CD/5.81 IN/at DT/the JJ/same NN/stage JJ/last NN/year/./. RB/Again PRP/it VBZ/seems IN/that NN/cocoa VBN/delivered RBR/earlier IN/on NN/consignment VBD/was VBN/included IN/in DT/the NNS/arrivals NNS/figures/ ./. + NNP/Comissaria NNP/Smith VBD/said EX/there VBZ/is RB/still DT/some NN/doubt IN/as TO/to WRB/how JJ/much JJ/old NN/crop NN/cocoa VBZ/is RB/still JJ/available IN/as NN/harvesting VBZ/has RB/practically VBN/come TO/to DT/an NN/end/./. IN/With JJ/total NNP/Bahia NN/crop NNS/estimates IN/around CD/6.4 NN/mln NNS/bags CC/and NNS/sales VBG/standing IN/at RB/almost CD/6.2 NN/mln EX/there VBP/are DT/a JJ/few CD/hundred CD/thousand NNS/bags RB/still IN/in DT/the NNS/hands IN/of NNS/farmers ,/, NNS/middlemen ,/, NNS/exporters CC/and NNS/processors/ ./. + EX/There VBP/are NNS/doubts IN/as TO/to WRB/how RB/much IN/of DT/this NN/cocoa MD/would VB/be NN/fit IN/for NN/export IN/as NNS/shippers VBP/are RB/now VBG/experiencing NNS/dificulties IN/in VBG/obtaining NN/+/NN/Bahia JJ/superior/NN/+ NNS/certificates/ ./. + IN/In NN/view IN/of DT/the JJR/lower NN/quality IN/over JJ/recent NNS/weeks NNS/farmers VBP/have VBN/sold DT/a JJ/good NN/part IN/of PRP$/their NN/cocoa VBN/held IN/on NN/consignment/ ./. + NNP/Comissaria NNP/Smith VBD/said NN/spot NN/bean NNS/prices VBD/rose TO/to CD/340 TO/to CD/350 NN/cruzados IN/per NN/arroba IN/of CD/15 NN/kilos/ ./. + NNP/Bean NNS/shippers VBD/were JJ/reluctant TO/to VB/offer JJ/nearby NN/shipment CC/and RB/only JJ/limited NNS/sales VBD/were VBN/booked IN/for NNP/March NN/shipment IN/at CD/1,750 TO/to CD/1,780 NN/dlrs IN/per NN/tonne TO/to NNS/ports TO/to VB/be VBN/named/ ./. + JJ/New NN/crop NNS/sales VBD/were RB/also JJ/light CC/and DT/all TO/to JJ/open NNS/ports IN/with NNP/June/NN///NNP/July VBG/going IN/at CD/1,850 CC/and CD/1,880 NN/dlrs CC/and IN/at CD/35 CC/and CD/45 NN/dlrs IN/under NNP/New NNP/York NN/july ,/, NNP/Aug/NN///NNP/Sept IN/at CD/1,870 ,/, CD/1,875 CC/and CD/1,880 NN/dlrs IN/per NN/tonne NNP/FOB/ ./. + JJ/Routine NNS/sales IN/of NN/butter VBD/were VBN/made/./. NNP/March/NN///NNP/April VBD/sold IN/at CD/4,340 ,/, CD/4,345 CC/and CD/4,350 NN/dlrs/ ./. + NNP/April/NN///NNP/May NN/butter VBD/went IN/at CD/2.27 NNS/times NNP/New NNP/York NNP/May ,/, NNP/June/NN///NNP/July IN/at CD/4,400 CC/and CD/4,415 NN/dlrs ,/, NNP/Aug/NN///NNP/Sept IN/at CD/4,351 TO/to CD/4,450 NN/dlrs CC/and IN/at CD/2.27 CC/and CD/2.28 NNS/times NNP/New NNP/York NNP/Sept CC/and NNP/Oct/NN///NNP/Dec IN/at CD/4,480 NN/dlrs CC/and CD/2.27 NNS/times NNP/New NNP/York NNP/Dec ,/, NNP/Comissaria NNP/Smith VBD/said/ ./. + NNS/Destinations VBD/were DT/the NNP/U.S. ,/, JJ/Covertible NN/currency NNS/areas ,/, NNP/Uruguay CC/and JJ/open NNS/ports/ ./. + NNP/Cake NNS/sales VBD/were VBN/registered IN/at CD/785 TO/to CD/995 NN/dlrs IN/for NNP/March/NN///NNP/April ,/, CD/785 NN/dlrs IN/for NNP/May ,/, CD/753 NN/dlrs IN/for NNP/Aug CC/and CD/0.39 NNS/times NNP/New NNP/York NNP/Dec IN/for NNP/Oct/NN///NNP/Dec/ ./. + NNS/Buyers VBD/were DT/the NNP/U.S. ,/, NNP/Argentina ,/, NNP/Uruguay CC/and JJ/convertible NN/currency NNS/areas/ ./. + NNP/Liquor NNS/sales VBD/were VBN/limited IN/with NNP/March/NN///NNP/April VBG/selling IN/at CD/2,325 CC/and CD/2,380 NN/dlrs ,/, NNP/June/NN///NNP/July IN/at CD/2,375 NN/dlrs CC/and IN/at CD/1.25 NNS/times NNP/New NNP/York NNP/July ,/, NNP/Aug/NN///NNP/Sept IN/at CD/2,400 NN/dlrs CC/and IN/at CD/1.25 NNS/times NNP/New NNP/York NNP/Sept CC/and NNP/Oct/NN///NNP/Dec IN/at CD/1.25 NNS/times NNP/New NNP/York NNP/Dec ,/, NNP/Comissaria NNP/Smith VBD/said/ ./. + JJ/Total NN/Bahia NNS/sales VBP/are RB/currently VBN/estimated IN/at CD/6.13 NN/mln NNS/bags IN/against DT/the NN/1986/87 NN/crop CC/and CD/1.06 NN/mln NNS/bags IN/against DT/the NN/1987/88 NN/crop/ ./. + JJ/Final NNS/figures IN/for DT/the NN/period TO/to NNP/February CD/28 VBP/are VBN/expected TO/to VB/be VBN/published IN/by DT/the JJ/Brazilian NNP/Cocoa NNP/Trade NNP/Commission IN/after NN/carnival WDT/which VBZ/ends NN/midday IN/on NNP/February CD/27/ ./. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-106.pos.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-106.pos.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-106.pos.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/pos/reut2-000.sgm-106.pos.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,9 ---- + NNP/Iran VBD/announced NN/tonight IN/that PRP$/its JJ/major NN/offensive IN/against NNP/Iraq IN/in DT/the NNP/Gulf NN/war VBD/had VBN/ended IN/after VBG/dealing JJ/savage NNS/blows IN/against DT/the NNP/Baghdad NN/government ./. + DT/The JJ/Iranian NN/news NN/agency NNP/IRNA/,/, IN/in DT/a NN/report VBN/received IN/in NNP/London/,/, VBD/said DT/the NN/operation NNP/code-named NNP/Karbala-5 VBD/launched IN/into NNP/Iraq IN/on NNP/January CD/9 VBD/was RB/now RP/over ./. + PRP/It VBD/quoted DT/a NN/joint NN/statewment IN/by DT/the JJ/Iranian NNP/Army CC/and NNP/Revolutionary NNPS/Guards NNP/Corps IN/as VBG/saying IN/that PRP$/their NNS/forces VBD/had VBD/dealt CD/one IN/of DT/the JJS/severest NNS/blows IN/on DT/the JJ/Iraqi NN/war NN/machine IN/in DT/the NN/history IN/of DT/the JJ/Iraq-imposed NN/war ./. + DT/The NN/statement IN/by DT/the JJ/Iranian NNP/High NNP/Command VBD/appeared TO/to VB/herald DT/the NN/close IN/of DT/an NN/assault IN/on DT/the JJ/port NN/city IN/of NNP/Basra IN/in JJ/southern NNP/Iraq ./. + DT/The NN/operation VBD/was VBN/launched IN/at DT/a NN/time WRB/when DT/the NNP/Baghdad NN/government VBD/was VBG/spreading JJ/extensive NN/propaganda IN/on DT/the NN/resistance NN/power IN/of PRP$/its NN/army/:/.../,/, VBD/said DT/the NN/statement VBN/quoted IN/by NNP/IRNA ./. + PRP/It VBD/claimed JJ/massive NNS/victories IN/in DT/the NN/seven-week JJ/offensive CC/and VBN/called IN/on NNS/supporters IN/of NNP/Baghdad TO/to VB/come TO/to PRP$/their NNS/senses CC/and VB/discontinue NN/support IN/for WP/what PRP/it VBD/called DT/the VBG/tottering NN/regime IN/in NNP/Iraq ./. + NNP/Iran VBD/said PRP$/its NNS/forces VBD/had JJ/liberated CD/155 JJ/square NNS/kilometers IN/of JJ/enemy-occupied NN/territory IN/during DT/the CD/1987 NN/offensive CC/and VBN/taken IN/over NNS/islands/,/, NNS/townships/,/, NNS/rivers CC/and NN/part IN/of DT/a NN/road VBG/leading IN/into NNP/Basra ./. + DT/The JJ/Iranian NNS/forces VBP/are IN/in JJ/full NN/control IN/of DT/these NNS/areas/,/, DT/the NN/statement VBD/said ./. + PRP/It VBD/said CD/81 JJ/Iraqi NNS/brigades CC/and NNS/battalions VBD/were RB/totally VBN/destroyed/,/, IN/along IN/with CD/700 NNS/tanks CC/and CD/1,500 JJ/other NNS/vehicles ./. DT/The NN/victory NN/list RB/also VBD/included CD/80 NNS/warplanes VBD/downed/,/, CD/250 NN/anti/:/- NN/aircraft NNS/guns CC/and CD/400 NNS/pieces IN/of JJ/military NN/hardware VBN/destroyed CC/and DT/the NN/seizure IN/of CD/220 NNS/tanks CC/and JJ/armoured NNS/personnel NNS/carriers ./. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/pos.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/pos.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/pos.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/pos.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,26 ---- + Showers_NNS continued_VBD throughout_IN the_DT week_NN in_IN the_DT Bahia_NNP cocoa_NN zone_NN ,_, alleviating_VBG the_DT drought_NN since_IN early_JJ January_NNP and_CC improving_VBG prospects_NNS for_IN the_DT coming_VBG temporao_NN ,_, although_IN normal_JJ humidity_NN levels_NNS have_VBP not_RB been_VBN restored_VBN ,_, Comissaria_NNP Smith_NNP said_VBD in_IN PRP$its_ weekly_JJ review_NN_ ._. + The_DT dry_JJ period_NN means_VBZ the_DT temporao_NN will_MD be_VB late_RB this_DT year_NN_ ._. + Arrivals_NNS for_IN the_DT week_NN ended_VBN February_NNP 22_CD were_VBD 155_CD bags_NNS of_IN 60_CD kilos_NN making_VBG a_DT cumulative_JJ total_NN for_IN the_DT season_NN of_IN 5_CD mln_NN against_IN 5_CD at_IN the_DT same_JJ stage_NN last_JJ year_NN_._. Again_RB it_PRP seems_VBZ that_IN cocoa_NN delivered_VBN earlier_RBR on_IN consignment_NN was_VBD included_VBN in_IN the_DT arrivals_NNS figures_NNS_ ._. + Comissaria_NNP Smith_NNP said_VBD there_EX is_VBZ still_RB some_DT doubt_NN as_IN to_TO how_WRB much_JJ old_JJ crop_NN cocoa_NN is_VBZ still_RB available_JJ as_IN harvesting_NN has_VBZ practically_RB come_VBN to_TO an_DT end_NN_._. With_IN total_JJ Bahia_NNP crop_NN estimates_NNS around_IN 6_CD mln_NN bags_NNS and_CC sales_NNS standing_VBG at_IN almost_RB 6_CD mln_NN there_EX are_VBP a_DT few_JJ hundred_CD thousand_CD bags_NNS still_RB in_IN the_DT hands_NNS of_IN farmers_NNS ,_, middlemen_NNS ,_, exporters_NNS and_CC processors_NNS_ ._. + There_EX are_VBP doubts_NNS as_IN to_TO how_WRB much_RB of_IN this_DT cocoa_NN would_MD be_VB fit_NN for_IN export_NN as_IN shippers_NNS are_VBP now_RB experiencing_VBG dificulties_NNS in_IN obtaining_VBG _NN+NN_Bahia_ superior_JJNN__+ certificates_NNS_ ._. + In_IN view_NN of_IN the_DT lower_JJR quality_NN over_IN recent_JJ weeks_NNS farmers_NNS have_VBP sold_VBN a_DT good_JJ part_NN of_IN PRP$their_ cocoa_NN held_VBN on_IN consignment_NN_ ._. + Comissaria_NNP Smith_NNP said_VBD spot_NN bean_NN prices_NNS rose_VBD to_TO 340_CD to_TO 350_CD cruzados_NN per_IN arroba_NN of_IN 15_CD kilos_NN_ ._. + Bean_NNP shippers_NNS were_VBD reluctant_JJ to_TO offer_VB nearby_JJ shipment_NN and_CC only_RB limited_JJ sales_NNS were_VBD booked_VBN for_IN March_NNP shipment_NN at_IN 1_CD to_TO 1_CD dlrs_NN per_IN tonne_NN to_TO ports_NNS to_TO be_VB named_VBN_ ._. + New_JJ crop_NN sales_NNS were_VBD also_RB light_JJ and_CC all_DT to_TO open_JJ ports_NNS with_IN June_NNPNN___NNP_July_ going_VBG at_IN 1_CD and_CC 1_CD dlrs_NN and_CC at_IN 35_CD and_CC 45_CD dlrs_NN under_IN New_NNP York_NNP july_NN ,_, Aug_NNPNN___NNP_Sept_ at_IN 1_CD ,_, 1_CD and_CC 1_CD dlrs_NN per_IN tonne_NN FOB_NNP_ ._. + Routine_JJ sales_NNS of_IN butter_NN were_VBD made_VBN_._. March_NNPNN___NNP_April_ sold_VBD at_IN 4_CD ,_, 4_CD and_CC 4_CD dlrs_NN_ ._. + April_NNPNN___NNP_May_ butter_NN went_VBD at_IN 2_CD times_NNS New_NNP York_NNP May_NNP ,_, June_NNPNN___NNP_July_ at_IN 4_CD and_CC 4_CD dlrs_NN ,_, Aug_NNPNN___NNP_Sept_ at_IN 4_CD to_TO 4_CD dlrs_NN and_CC at_IN 2_CD and_CC 2_CD times_NNS New_NNP York_NNP Sept_NNP and_CC Oct_NNPNN___NNP_Dec_ at_IN 4_CD dlrs_NN and_CC 2_CD times_NNS New_NNP York_NNP Dec_NNP ,_, Comissaria_NNP Smith_NNP said_VBD_ ._. + Destinations_NNS were_VBD the_DT U_NNP.S. ,_, Covertible_JJ currency_NN areas_NNS ,_, Uruguay_NNP and_CC open_JJ ports_NNS_ ._. + Cake_NNP sales_NNS were_VBD registered_VBN at_IN 785_CD to_TO 995_CD dlrs_NN for_IN March_NNPNN___NNP_April_ ,_, 785_CD dlrs_NN for_IN May_NNP ,_, 753_CD dlrs_NN for_IN Aug_NNP and_CC 0_CD times_NNS New_NNP York_NNP Dec_NNP for_IN Oct_NNPNN___NNP_Dec__ ._. + Buyers_NNS were_VBD the_DT U_NNP.S. ,_, Argentina_NNP ,_, Uruguay_NNP and_CC convertible_JJ currency_NN areas_NNS_ ._. + Liquor_NNP sales_NNS were_VBD limited_VBN with_IN March_NNPNN___NNP_April_ selling_VBG at_IN 2_CD and_CC 2_CD dlrs_NN ,_, June_NNPNN___NNP_July_ at_IN 2_CD dlrs_NN and_CC at_IN 1_CD times_NNS New_NNP York_NNP July_NNP ,_, Aug_NNPNN___NNP_Sept_ at_IN 2_CD dlrs_NN and_CC at_IN 1_CD times_NNS New_NNP York_NNP Sept_NNP and_CC Oct_NNPNN___NNP_Dec_ at_IN 1_CD times_NNS New_NNP York_NNP Dec_NNP ,_, Comissaria_NNP Smith_NNP said_VBD_ ._. + Total_JJ Bahia_NN sales_NNS are_VBP currently_RB estimated_VBN at_IN 6_CD mln_NN bags_NNS against_IN the_DT 1986_NN87_ crop_NN and_CC 1_CD mln_NN bags_NNS against_IN the_DT 1987_NN88_ crop_NN_ ._. + Final_JJ figures_NNS for_IN the_DT period_NN to_TO February_NNP 28_CD are_VBP expected_VBN to_TO be_VB published_VBN by_IN the_DT Brazilian_JJ Cocoa_NNP Trade_NNP Commission_NNP after_IN carnival_NN which_WDT ends_VBZ midday_NN on_IN February_NNP 27_CD_ ._. + Iran_NNP announced_VBD tonight_NN that_IN PRP$its_ major_JJ offensive_NN against_IN Iraq_NNP in_IN the_DT Gulf_NNP war_NN had_VBD ended_VBN after_IN dealing_VBG savage_JJ blows_NNS against_IN the_DT Baghdad_NNP government_NN ._. + The_DT Iranian_JJ news_NN agency_NN IRNA_NNP ,_, in_IN a_DT report_NN received_VBN in_IN London_NNP ,_, said_VBD the_DT operation_NN code_NNP-named Karbala_NNP-5 launched_VBD into_IN Iraq_NNP on_IN January_NNP 9_CD was_VBD now_RB over_RP ._. + It_PRP quoted_VBD a_DT joint_NN statewment_NN by_IN the_DT Iranian_JJ Army_NNP and_CC Revolutionary_NNP Guards_NNPS Corps_NNP as_IN saying_VBG that_IN PRP$their_ forces_NNS had_VBD dealt_VBD one_CD of_IN the_DT severest_JJS blows_NNS on_IN the_DT Iraqi_JJ war_NN machine_NN in_IN the_DT history_NN of_IN the_DT Iraq_JJ-imposed war_NN ._. + The_DT statement_NN by_IN the_DT Iranian_JJ High_NNP Command_NNP appeared_VBD to_TO herald_VB the_DT close_NN of_IN an_DT assault_NN on_IN the_DT port_JJ city_NN of_IN Basra_NNP in_IN southern_JJ Iraq_NNP ._. + The_DT operation_NN was_VBD launched_VBN at_IN a_DT time_NN when_WRB the_DT Baghdad_NNP government_NN was_VBD spreading_VBG extensive_JJ propaganda_NN on_IN the_DT resistance_NN power_NN of_IN PRP$its_ army_NN_:_... ,_, said_VBD the_DT statement_NN quoted_VBN by_IN IRNA_NNP ._. + It_PRP claimed_VBD massive_JJ victories_NNS in_IN the_DT seven_NN-week offensive_JJ and_CC called_VBN on_IN supporters_NNS of_IN Baghdad_NNP to_TO come_VB to_TO PRP$their_ senses_NNS and_CC discontinue_VB support_NN for_IN what_WP it_PRP called_VBD the_DT tottering_VBG regime_NN in_IN Iraq_NNP ._. + Iran_NNP said_VBD PRP$its_ forces_NNS had_VBD liberated_JJ 155_CD square_JJ kilometers_NNS of_IN enemy_JJ-occupied territory_NN during_IN the_DT 1987_CD offensive_NN and_CC taken_VBN over_IN islands_NNS ,_, townships_NNS ,_, rivers_NNS and_CC part_NN of_IN a_DT road_NN leading_VBG into_IN Basra_NNP ._. + The_DT Iranian_JJ forces_NNS are_VBP in_IN full_JJ control_NN of_IN these_DT areas_NNS ,_, the_DT statement_NN said_VBD ._. + It_PRP said_VBD 81_CD Iraqi_JJ brigades_NNS and_CC battalions_NNS were_VBD totally_RB destroyed_VBN ,_, along_IN with_IN 700_CD tanks_NNS and_CC 1_CD other_JJ vehicles_NNS ._. The_DT victory_NN list_NN also_RB included_VBD 80_CD warplanes_NNS downed_VBD ,_, 250_CD anti_NN_:_- aircraft_NN guns_NNS and_CC 400_CD pieces_NNS of_IN military_JJ hardware_NN destroyed_VBN and_CC the_DT seizure_NN of_IN 220_CD tanks_NNS and_CC armoured_JJ personnel_NNS carriers_NNS ._. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/sentences.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/sentences.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/sentences.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/sentences.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,143 ---- + Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government. + The Iranian news agency IRNA, in a report received in London, said the operation code-named Karbala-5 launched into Iraq on January 9 was now over. + It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war." + The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq. + "The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army...," said the statement quoted by IRNA. + It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "come to their senses" and discontinue support for what it called the tottering regime in Iraq. + Iran said its forces had "liberated" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands, townships, rivers and part of a road leading into Basra. + The Iranian forces "are in full control of these areas," the statement said. + It said 81 Iraqi brigades and battalions were totally destroyed, along with 700 tanks and 1,500 other vehicles. + The victory list also included 80 warplanes downed, 250 anti- aircraft guns and 400 pieces of military hardware destroyed and the seizure of 220 tanks and armoured personnel carriers. + + U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25, the Federal Reserve said. + The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs, with extended credits up 10 mln dlrs at 304 mln dlrs. + The week was the second half of a two-week statement period. + Net borrowings in the prior week averaged 451 mln dlrs. + Commenting on the two-week statement period ended February 25, the Fed said that banks had average net free reserves of 644 mln dlrs a day, down from 1.34 billion two weeks earlier. + A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday. + He said that natural float had been "acting a bit strangely" for this time of year, noting that there had been poor weather during the latest week. + The spokesman said that natural float ranged from under 500 mln dlrs on Friday, for which he could give no reason, to nearly one billion dlrs on both Thursday and Wednesday. + The Fed spokeman could give no reason for Thursday's high float, but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts. + For the week as a whole, he said that float related as of adjustments were "small," adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country. + The spokesman said that on both Tuesday and Wednesday, two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days. + However, he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact. + During the week ended Wednesday, 45 pct of net discount window borrowings were made by the smallest banks, with 30 pct by the 14 large money center banks and 25 pct by large regional institutions. + On Wednesday, 55 pct of the borrowing was accounted for by the money center banks, with 30 pct by the large regionals and 15 pct by the smallest banks. + The Fed spokesman said the banking system had excess reserves on Thursday, Monday and Tuesday and a deficit on Friday and Wedndsday. + That produced a small daily average deficit for the week as a whole. + For the two-week period, he said there were relatively high excess reserves on a daily avearge, almost all of which were at the smallest banks. + + American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock. + American Express stock got a lift from the rumor, as the market calculated a partially public Shearson may command a good market value, thereby boosting the total value of American Express. + The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend. + American Express closed on the New York Stock Exchange at 72-5/8, up 4-1/8 on heavy volume. + American Express would not comment on the rumors or its stock activity. + Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes. + At the meeting, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson, according to analysts. + Yesterday, Shearson said it was elevating its chief operating officer, Jeffery Lane, to the added position of president, which had been vacant. + It also created four new positions for chairmen of its operating divisions. + Analysts speculated a partial spinoff would make most sense, contrary to one variation on market rumors of a total spinoff. + Some analysts, however, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express, contributing about 20 pct of earnings last year. + "I think it is highly unlikely that American Express is going to sell shearson," said Perrin Long of Lipper Analytical. + He questioned what would be a better investment than "a very profitable securities firm." + Several analysts said American Express is not in need of cash, which might be the only reason to sell a part of a strong asset. + But others believe the company could very well of considered the option of spinning out part of Shearson, and one rumor suggests selling about 20 pct of it in the market. + Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past. + "Shearson being as profitable as it is would have fetched a big premium in the market place. + Shearson's book value is in the 1.4 mln dlr range. + Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization," said Eckenfelder. + Some analysts said American Express could use capital since it plans to expand globally. + "They have enormous internal growth plans that takes capital. + You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road," said E.F. Hutton Group analyst Michael Lewis. + "They've outlined the fact that they're investing heavily in the future, which goes heavily into the international arena," said Lewis. "...That does not preclude acquisitions and divestitures along the way," he said. + Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson, its stock might better reflect other assets, such as the travel related services business. + "It could find its true water mark with a lesser exposure to brokerage. + The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company," he said. + Lewis said Shearson contributed 316 mln in after-tax operating earnings, up from about 200 mln dlrs in 1985. + Reuter  + + Coleco Industries Inc said it expects to return to profitability in 1987. + Earlier, Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier. + In a prepared statement, the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs. + Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year. + Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability, it said. + At the annual Toy Fair earlier this month, vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable. + Venezuela is seeking a 'constructive and flexible' attitude from its creditor banks in current talks to reschedule 21 billion dlrs in foreign debt, finance minister manuel azpurua told a press conference. + He declined to comment on meetings this week in new york between public finances director jorge marcano and venezuela's 13-bank advisory committee except to say, "they are progressing." + Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments, but each country must negotiate according to its own interest. + Asked to comment on chile's agreement with its creditors today, which includes an interest rate margin of one pct over libor, azpurua said only, "that is good news." + According to banking sources, the banks' latest offer to venezuela is also a one pct margin as against the last february's 1-1/8 pct rescheduling accord and the 7/8 pct Venezuela wants. + Azpurua said four basic elements are being negotiated with the banks now: spread reduction, deferral of principal payments due in 1987 and 1988, lenghtening the 12-1/2 year repayment schedule, and debt capitalization schemes. + Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year. + It was due to amortize 1.05 billion dlrs under the rescheduling, and pay 420 mln dlrs in non-restructured principal, both public sector. + He said venezuela's original proposal was to pay no principal on restructured debt this year, but is now insisting that if it makes payments they be compensated by new bank loans. + The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year, but that no direct commitment was likely on new loans. + "debtors and bank creditors have a joint responsibility and there will be no lasting solution unless a positive flow of financing is guaranteed," azpurua said. + However, he appeared to discard earlier venezuelan proposals for a direct link between oil income and debt payments, "because circumstances change too quickly." + At the same time, he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt, based on experience in other countries. + The rules would be published by the finance ministry and the central bank. + + Thomson McKinnon Mortgage Assets Corp, a unit of Thomson McKinnon Inc, is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS. + The floating rate class amounts to 60 mln dlrs. + It has an average life of 7.11 years and matures 2018. + The CMOs have an initial coupon of 7.0375 pct, which will be reset 60 basis points above LIBOR, said sole manager Thomson McKinnon. + The inverse floater totals 4.8 mln dlrs. + It has an average life of 13.49 years and matures 2018. + These CMOs were given an initial coupon of 11-1/2 pct and priced at 104.40. + Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct). + A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined. + "The yield floats opposite of LIBOR," he said. + The fixed-rate tranche totals 35.2 mln dlrs. + It has an average life of 3.5 years and matures 2016. + The CMOs were assigned a 7.65 pct coupon and par pricing. + The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp, Freddie Mac, certificates. + + + OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices, oil industry analysts said. + "The movement to higher oil prices was never to be as easy as OPEC thought. + They may need an emergency meeting to sort out the problems," said Daniel Yergin, director of Cambridge Energy Research Associates, CERA. + Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets. + "OPEC's problem is not a price problem but a production issue and must be addressed in that way," said Paul Mlotok, oil analyst with Salomon Brothers Inc. + He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices. + But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December. + "OPEC has to learn that in a buyers market you cannot have deemed quotas, fixed prices and set differentials," said the regional manager for one of the major oil companies who spoke on condition that he not be named. + "The market is now trying to teach them that lesson again," he added. + David T. Mizrahi, editor of Mideast reports, expects OPEC to meet before June, although not immediately. + However, he is not optimistic that OPEC can address its principal problems. + "They will not meet now as they try to take advantage of the winter demand to sell their oil, but in late March and April when demand slackens," Mizrahi said. + But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd." + Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output. + "OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then," said Dillard Spriggs of Petroleum Analysis Ltd in New York. + But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production. + "Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production," he told Reuters in a telephone interview. + + BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance, banking analysts said. + Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering, which has yet to be approved by the Securities and Exchange Commission. + BankAmerica stock fell this week, along with other banking issues, on the news that Brazil has suspended interest payments on a large portion of its foreign debt. + The stock traded around 12, down 1/8, this afternoon, after falling to 11-1/2 earlier this week on the news. + Banking analysts said that with the immediate threat of the First Interstate Bancorp takeover bid gone, BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term. + BankAmerica filed the offer on January 26. + It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9. + A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated. + "The circumstances at the time will determine what we do," said Arthur Miller, BankAmerica's Vice President for Financial Communications, when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval. + "I'd put it off as long as they conceivably could," said Lawrence Cohn, analyst with Merrill Lynch, Pierce, Fenner and Smith. + Cohn said the longer BankAmerica waits, the longer they have to show the market an improved financial outlook. + Although BankAmerica has yet to specify the types of equities it would offer, most analysts believed a convertible preferred stock would encompass at least part of it. + Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders, noted Daniel Williams, analyst with Sutro Group. + Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter, the initial shock reaction is likely to ease over the coming weeks. + Nevertheless, BankAmerica, which holds about 2.70 billion dlrs in Brazilian loans, stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt, and as much as 200 mln dlrs if Brazil pays no interest for a year, said Joseph Arsenio, analyst with Birr, Wilson and Co. + He noted, however, that any potential losses would not show up in the current quarter. + + The Federal Deposit Insurance Corp (FDIC) said three troubled banks in Texas and Louisiana were merged with healthy financial institutions. + The FDIC said it subsidized the merger of Central Bank and Trust Co, Glenmora, La., with the healthy Peoples Bank and Trust Co, Natchitoches, La., after state regulators notified it that Central was in danger of failing. + Central had assets of 28.3 mln dlrs. + The FDIC said the deposits of the failed Farmers State Bank, Hart, Tex., were assumed by Hale County State Bank, Plainview, Tex. + Farmers, with 9.6 mln dlrs in assets, was closed by Texas bank regulators. + The deposits of the failed First National Bank of Crosby, Crosby, Tex., with total assets of 8.2 mln dlrs, were assumed by Central Bancshares of the South Inc, Birmingham, Ala., after First National was closed by federal bank regulators, the FDIC said. + Brazil's 14-bank advisory committee expressed "grave concern" to chief debt negotiator Antonio Padua de Seixas over the country's suspension of interest payments, according to a telex from committee chairman Citibank to creditor banks worldwide. + Bankers said the diplomatic phrase belied the deep anger and frustration on the committee over Brazil's unilateral move last Friday and its subsequent freeze on some 15 billion dlrs of short-term trade and interbank lines. + Seixas, director of the Brazilian central bank's foreign debt department, met the full panel on Tuesday and Wednesday. + Seixas, who met again this morning with senior Citibank executive William Rhodes and representatives from committee vice-chairmen Morgan Guaranty Trust Co and Lloyds Bank Plc, told the banks that the government was preparing a telex to explain and clarify the freeze on short-term credits. + The telex could be sent to creditors as early as today, bankers said. + Despite the rising tempers, bankers said there are no plans for Brazilian finance minister Dilson Funaro to meet commercial bankers during his trip to Washington on Friday and Saturday. + Funaro will be explaining Brazil's actions to U.S. Treasury Secretary James Baker, Federal Reserve Board chairman Paul Volcker and International Monetary Fund managing director Michel Camdessus before travelling to Europe at the weekend. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/tokenizer.txt lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/tokenizer.txt *** lucene-solr_4_1.bak/solr/contrib/opennlp/src/test-files/training/tokenizer.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/contrib/opennlp/src/test-files/training/tokenizer.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,69 ---- + Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government. + The Iranian news agency IRNA, in a report received in London, said the operation code-named Karbala-5 launched into Iraq on January 9 was now over. + It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war." + The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq. + "The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army...," said the statement quoted by IRNA. + It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "come to their senses" and discontinue support for what it called the tottering regime in Iraq. + Iran said its forces had "liberated" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands, townships, rivers and part of a road leading into Basra. + The Iranian forces "are in full control of these areas," the statement said. + It said 81 Iraqi brigades and battalions were totally destroyed, along with 700 tanks and 1,500 other vehicles. + + U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25, the Federal Reserve said. + The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs, with extended credits up 10 mln dlrs at 304 mln dlrs. + The week was the second half of a two-week statement period. + Net borrowings in the prior week averaged 451 mln dlrs. + Commenting on the two-week statement period ended February 25, the Fed said that banks had average net free reserves of 644 mln dlrs a day, down from 1.34 billion two weeks earlier. + A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday. + He said that natural float had been "acting a bit strangely" for this time of year, noting that there had been poor weather during the latest week. + The spokesman said that natural float ranged from under 500 mln dlrs on Friday, for which he could give no reason, to nearly one billion dlrs on both Thursday and Wednesday. + The Fed spokeman could give no reason for Thursday's high float, but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts. + For the week as a whole, he said that float related as of adjustments were "small," adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country. + The spokesman said that on both Tuesday and Wednesday, two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days. + However, he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact. + During the week ended Wednesday, 45 pct of net discount window borrowings were made by the smallest banks, with 30 pct by the 14 large money center banks and 25 pct by large regional institutions. + On Wednesday, 55 pct of the borrowing was accounted for by the money center banks, with 30 pct by the large regionals and 15 pct by the smallest banks. + The Fed spokesman said the banking system had excess reserves on Thursday, Monday and Tuesday and a deficit on Friday and Wedndsday. + That produced a small daily average deficit for the week as a whole. + For the two-week period, he said there were relatively high excess reserves on a daily avearge, almost all of which were at the smallest banks. + American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock. + American Express stock got a lift from the rumor, as the market calculated a partially public Shearson may command a good market value, thereby boosting the total value of American Express. + The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend. + American Express closed on the New York Stock Exchange at 72-5/8, up 4-1/8 on heavy volume. + American Express would not comment on the rumors or its stock activity. + Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes. + At the meeting, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson, according to analysts. + Yesterday, Shearson said it was elevating its chief operating officer, Jeffery Lane, to the added position of president, which had been vacant. + It also created four new positions for chairmen of its operating divisions. + Analysts speculated a partial spinoff would make most sense, contrary to one variation on market rumors of a total spinoff. + Some analysts, however, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express, contributing about 20 pct of earnings last year. + "I think it is highly unlikely that American Express is going to sell shearson," said Perrin Long of Lipper Analytical. + He questioned what would be a better investment than "a very profitable securities firm." + Several analysts said American Express is not in need of cash, which might be the only reason to sell a part of a strong asset. + But others believe the company could very well of considered the option of spinning out part of Shearson, and one rumor suggests selling about 20 pct of it in the market. + Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past. + "Shearson being as profitable as it is would have fetched a big premium in the market place. + Some analysts said American Express could use capital since it plans to expand globally. + "They've outlined the fact that they're investing heavily in the future, which goes heavily into the international arena," said Lewis. + Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson, its stock might better reflect other assets, such as the travel related services business. + Lewis said Shearson contributed 316 mln in after-tax operating earnings, up from about 200 mln dlrs in 1985. + Coleco Industries Inc said it expects to return to profitability in 1987. + Earlier, Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier. + In a prepared statement, the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs. + Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year. + Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability, it said. + At the annual Toy Fair earlier this month, vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable. + Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments, but each country must negotiate according to its own interest. + Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year. + It was due to amortize 1.05 billion dlrs under the rescheduling, and pay 420 mln dlrs in non-restructured principal, both public sector. + He said venezuela's original proposal was to pay no principal on restructured debt this year, but is now insisting that if it makes payments they be compensated by new bank loans. + The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year, but that no direct commitment was likely on new loans. + At the same time, he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt, based on experience in other countries. + The rules would be published by the finance ministry and the central bank. + + Thomson McKinnon Mortgage Assets Corp, a unit of Thomson McKinnon Inc, is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS. + The floating rate class amounts to 60 mln dlrs. + The inverse floater totals 4.8 mln dlrs. + Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct). + A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined. + The fixed-rate tranche totals 35.2 mln dlrs. + The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp, Freddie Mac, certificates. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/example/solr/collection1/conf/solrconfig.xml lucene_solr_4_1-org/solr/example/solr/collection1/conf/solrconfig.xml *** lucene-solr_4_1.bak/solr/example/solr/collection1/conf/solrconfig.xml Wed Feb 27 22:05:27 2013 --- lucene_solr_4_1-org/solr/example/solr/collection1/conf/solrconfig.xml Wed Feb 27 11:59:23 2013 *************** *** 78,83 **** --- 78,87 ---- + + + + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt lucene_solr_4_1-org/solr/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt *** lucene-solr_4_1.bak/solr/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/jwnl-1.4_rc3-LICENSE-ASL.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/jwnl-1.4_rc3-NOTICE.txt lucene_solr_4_1-org/solr/licenses/jwnl-1.4_rc3-NOTICE.txt *** lucene-solr_4_1.bak/solr/licenses/jwnl-1.4_rc3-NOTICE.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/jwnl-1.4_rc3-NOTICE.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/jwnl-1.4_rc3.jar.sha1 lucene_solr_4_1-org/solr/licenses/jwnl-1.4_rc3.jar.sha1 *** lucene-solr_4_1.bak/solr/licenses/jwnl-1.4_rc3.jar.sha1 Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/jwnl-1.4_rc3.jar.sha1 Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + e37ef9062f22bb4ce83d9bc38bb87df7bf671553 diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt lucene_solr_4_1-org/solr/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt *** lucene-solr_4_1.bak/solr/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/opennlp-maxent-3.0.2-incubating-LICENSE-ASL.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt lucene_solr_4_1-org/solr/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt *** lucene-solr_4_1.bak/solr/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/opennlp-maxent-3.0.2-incubating-NOTICE.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 lucene_solr_4_1-org/solr/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 *** lucene-solr_4_1.bak/solr/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/opennlp-maxent-3.0.2-incubating.jar.sha1 Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + b6c5e43e399b076d2c3ce013898c9d6229a55066 diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt lucene_solr_4_1-org/solr/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt *** lucene-solr_4_1.bak/solr/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/opennlp-tools-1.5.2-incubating-LICENSE-ASL.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,202 ---- + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt lucene_solr_4_1-org/solr/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt *** lucene-solr_4_1.bak/solr/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/opennlp-tools-1.5.2-incubating-NOTICE.txt Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1,6 ---- + Apache Commons IO + Copyright 2001-2008 The Apache Software Foundation + + This product includes software developed by + The Apache Software Foundation (http://www.apache.org/). + diff -crBN -X exclude.pats lucene-solr_4_1.bak/solr/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 lucene_solr_4_1-org/solr/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 *** lucene-solr_4_1.bak/solr/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 Thu Jan 1 01:00:00 1970 --- lucene_solr_4_1-org/solr/licenses/opennlp-tools-1.5.2-incubating.jar.sha1 Wed Feb 27 11:29:43 2013 *************** *** 0 **** --- 1 ---- + 0357b094d59517e1a389369de900735da8c75bfc