Index: solr/core/src/test/org/apache/solr/TestJoin.java =================================================================== --- solr/core/src/test/org/apache/solr/TestJoin.java (revision 1306785) +++ solr/core/src/test/org/apache/solr/TestJoin.java (working copy) @@ -21,8 +21,8 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.BooleanQuery; -import org.apache.noggit.JSONUtil; -import org.apache.noggit.ObjectBuilder; +import org.apache.solr.internal.noggit.JSONUtil; +import org.apache.solr.internal.noggit.ObjectBuilder; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.JsonUpdateRequestHandler; Index: solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java =================================================================== --- solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java (revision 1306785) +++ solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java (working copy) @@ -17,7 +17,7 @@ package org.apache.solr.request; -import org.apache.noggit.ObjectBuilder; +import org.apache.solr.internal.noggit.ObjectBuilder; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; Index: solr/core/src/test/org/apache/solr/TestGroupingSearch.java =================================================================== --- solr/core/src/test/org/apache/solr/TestGroupingSearch.java (revision 1306785) +++ solr/core/src/test/org/apache/solr/TestGroupingSearch.java (working copy) @@ -18,8 +18,8 @@ package org.apache.solr; import org.apache.lucene.search.FieldCache; -import org.apache.noggit.JSONUtil; -import org.apache.noggit.ObjectBuilder; +import org.apache.solr.internal.noggit.JSONUtil; +import org.apache.solr.internal.noggit.ObjectBuilder; import org.apache.solr.common.params.GroupParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; Index: solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java =================================================================== --- solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java (revision 1306785) +++ solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java (working copy) @@ -25,7 +25,7 @@ import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; -import org.apache.noggit.ObjectBuilder; +import org.apache.solr.internal.noggit.ObjectBuilder; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; Index: solr/core/src/test/org/apache/solr/search/TestRecovery.java =================================================================== --- solr/core/src/test/org/apache/solr/search/TestRecovery.java (revision 1306785) +++ solr/core/src/test/org/apache/solr/search/TestRecovery.java (working copy) @@ -18,8 +18,8 @@ import org.apache.lucene.util.BytesRef; -import org.apache.noggit.JSONUtil; -import org.apache.noggit.ObjectBuilder; +import org.apache.solr.internal.noggit.JSONUtil; +import org.apache.solr.internal.noggit.ObjectBuilder; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.util.ByteUtils; import org.apache.solr.request.SolrQueryRequest; Index: solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java =================================================================== --- solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java (revision 1306785) +++ solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java (working copy) @@ -18,8 +18,8 @@ package org.apache.solr.servlet; import org.apache.lucene.util.BytesRef; -import org.apache.noggit.CharArr; -import org.apache.noggit.JSONWriter; +import org.apache.solr.internal.noggit.CharArr; +import org.apache.solr.internal.noggit.JSONWriter; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.core.CoreContainer; Index: solr/core/src/java/org/apache/solr/internal/noggit/CharArr.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/noggit/CharArr.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/noggit/CharArr.java (revision 0) @@ -0,0 +1,355 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.internal.noggit; + + +import java.io.IOException; +import java.io.Reader; +import java.io.Writer; +import java.nio.CharBuffer; + +// CharArr origins +// V1.0 7/06/97 +// V1.1 9/21/99 +// V1.2 2/02/04 // Java5 features +// V1.3 11/26/06 // Make safe for Java 1.4, work into Noggit +// @author yonik + + +// Java5 version could look like the following: +// public class CharArr implements CharSequence, Appendable, Readable, Closeable { + + +/** + * @author yonik + * @version $Id: CharArr.java 583538 2007-10-10 16:53:02Z yonik $ + */ +public class CharArr implements CharSequence, Appendable { + protected char[] buf; + protected int start; + protected int end; + + public CharArr() { + this(32); + } + + public CharArr(int size) { + buf = new char[size]; + } + + public CharArr(char[] arr, int start, int end) { + set(arr,start,end); + } + + public void setStart(int start) { this.start = start; } + public void setEnd(int end) { this.end = end; } + public void set(char[] arr, int start, int end) { + this.buf = arr; + this.start = start; + this.end = end; + } + + public char[] getArray() { return buf; } + public int getStart() { return start; } + public int getEnd() { return end; } + public int size() { return end-start; } + public int length() { return size(); } + public int capacity() { return buf.length; } + + + public char charAt(int index) { + return buf[start+index]; + } + + public CharArr subSequence(int start, int end) { + return new CharArr(buf, this.start+start, this.start+end); + } + + public int read() throws IOException { + if (start>=end) return -1; + return buf[start++]; + } + + public int read(char cbuf[], int off, int len) { + //TODO + return 0; + } + + public void unsafeWrite(char b) { + buf[end++] = b; + } + + public void unsafeWrite(int b) { unsafeWrite((char)b); } + + public void unsafeWrite(char b[], int off, int len) { + System.arraycopy(b, off, buf, end, len); + end += len; + } + + protected void resize(int len) { + char newbuf[] = new char[Math.max(buf.length << 1, len)]; + System.arraycopy(buf, start, newbuf, 0, size()); + buf = newbuf; + } + + public void reserve(int num) { + if (end + num > buf.length) resize(end + num); + } + + public void write(char b) { + if (end >= buf.length) { + resize(end+1); + } + unsafeWrite(b); + } + + public final void write(int b) { write((char)b); } + + public final void write(char[] b) { + write(b,0,b.length); + } + + public void write(char b[], int off, int len) { + reserve(len); + unsafeWrite(b, off, len); + } + + public final void write(CharArr arr) { + write(arr.buf, start, end-start); + } + + public final void write(String s) { + write(s, 0, s.length()); + } + + public void write(String s, int stringOffset, int len) { + reserve(len); + s.getChars(stringOffset, len, buf, end); + end += len; + } + + public void flush() { + } + + public final void reset() { + start = end = 0; + } + + public void close() { + } + + public char[] toCharArray() { + char newbuf[] = new char[size()]; + System.arraycopy(buf, start, newbuf, 0, size()); + return newbuf; + } + + + public String toString() { + return new String(buf, start, size()); + } + + + public int read(CharBuffer cb) throws IOException { + + /*** + int sz = size(); + if (sz<=0) return -1; + if (sz>0) cb.put(buf, start, sz); + return -1; + ***/ + + int sz = size(); + if (sz>0) cb.put(buf, start, sz); + start=end; + while (true) { + fill(); + int s = size(); + if (s==0) return sz==0 ? -1 : sz; + sz += s; + cb.put(buf, start, s); + } + } + + + public int fill() throws IOException { + return 0; // or -1? + } + + //////////////// Appendable methods ///////////// + public final Appendable append(CharSequence csq) throws IOException { + return append(csq, 0, csq.length()); + } + + public Appendable append(CharSequence csq, int start, int end) throws IOException { + write(csq.subSequence(start, end).toString()); + return null; + } + + public final Appendable append(char c) throws IOException { + write(c); + return this; + } +} + + +class NullCharArr extends CharArr { + public NullCharArr() { + super(new char[1],0,0); + } + public void unsafeWrite(char b) {} + + public void unsafeWrite(char b[], int off, int len) {} + + public void unsafeWrite(int b) {} + + public void write(char b) {} + + public void write(char b[], int off, int len) {} + + public void reserve(int num) {} + + protected void resize(int len) {} + + public Appendable append(CharSequence csq, int start, int end) throws IOException { + return this; + } + + public char charAt(int index) { + return 0; + } + + public void write(String s, int stringOffset, int len) { + } +} + + + +// IDEA: a subclass that refills the array from a reader? +class CharArrReader extends CharArr { + protected final Reader in; + + public CharArrReader(Reader in, int size) { + super(size); + this.in = in; + } + + public int read() throws IOException { + if (start>=end) fill(); + return start>=end ? -1 : buf[start++]; + } + + public int read(CharBuffer cb) throws IOException { + // empty the buffer and then read direct + int sz = size(); + if (sz>0) cb.put(buf,start,end); + int sz2 = in.read(cb); + if (sz2>=0) return sz+sz2; + return sz>0 ? sz : -1; + } + + public int fill() throws IOException { + if (start>=end) { + reset(); + } else if (start>0) { + System.arraycopy(buf, start, buf, 0, size()); + end=size(); start=0; + } + /*** + // fill fully or not??? + do { + int sz = in.read(buf,end,buf.length-end); + if (sz==-1) return; + end+=sz; + } while (end < buf.length); + ***/ + + int sz = in.read(buf,end,buf.length-end); + if (sz>0) end+=sz; + return sz; + } + +} + + + +class CharArrWriter extends CharArr { + protected Writer sink; + + @Override + public void flush() { + try { + sink.write(buf, start, end-start); + } catch (IOException e) { + throw new RuntimeException(e); + } + start = end = 0; + } + + @Override + public void write(char b) { + if (end >= buf.length) { + flush(); + } + unsafeWrite(b); + } + + @Override + public void write(char b[], int off, int len) { + int space = buf.length - end; + if (len < space) { + unsafeWrite(b, off, len); + } else if (len < buf.length) { + unsafeWrite(b, off, space); + flush(); + unsafeWrite(b, off+space, len-space); + } else { + flush(); + try { + sink.write(b, off, len); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void write(String s, int stringOffset, int len) { + int space = buf.length - end; + if (len < space) { + s.getChars(stringOffset, stringOffset+len, buf, end); + end += len; + } else if (len < buf.length) { + // if the data to write is small enough, buffer it. + s.getChars(stringOffset, stringOffset+space, buf, end); + flush(); + s.getChars(stringOffset+space, stringOffset+len, buf, 0); + end = len-space; + } else { + flush(); + // don't buffer, just write to sink + try { + sink.write(s, stringOffset, len); + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + } +} + Property changes on: solr/core/src/java/org/apache/solr/internal/noggit/CharArr.java ___________________________________________________________________ Added: svn:executable + * Index: solr/core/src/java/org/apache/solr/internal/noggit/JSONParser.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/noggit/JSONParser.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/noggit/JSONParser.java (revision 0) @@ -0,0 +1,838 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.internal.noggit; + +import java.io.IOException; +import java.io.Reader; + +/** + * @author yonik + * @version $Id: JSONParser.java 1099557 2011-05-04 18:54:26Z yonik $ + */ + +public class JSONParser { + + /** Event indicating a JSON string value, including member names of objects */ + public static final int STRING=1; + /** Event indicating a JSON number value which fits into a signed 64 bit integer */ + public static final int LONG=2; + /** Event indicating a JSON number value which has a fractional part or an exponent + * and with string length <= 23 chars not including sign. This covers + * all representations of normal values for Double.toString(). + */ + public static final int NUMBER=3; + /** Event indicating a JSON number value that was not produced by toString of any + * Java primitive numerics such as Double or Long. It is either + * an integer outside the range of a 64 bit signed integer, or a floating + * point value with a string representation of more than 23 chars. + */ + public static final int BIGNUMBER=4; + /** Event indicating a JSON boolean */ + public static final int BOOLEAN=5; + /** Event indicating a JSON null */ + public static final int NULL=6; + /** Event indicating the start of a JSON object */ + public static final int OBJECT_START=7; + /** Event indicating the end of a JSON object */ + public static final int OBJECT_END=8; + /** Event indicating the start of a JSON array */ + public static final int ARRAY_START=9; + /** Event indicating the end of a JSON array */ + public static final int ARRAY_END=10; + /** Event indicating the end of input has been reached */ + public static final int EOF=11; + + public static class ParseException extends RuntimeException { + public ParseException(String msg) { + super(msg); + } + } + + public static String getEventString( int e ) + { + switch( e ) + { + case STRING: return "STRING"; + case LONG: return "LONG"; + case NUMBER: return "NUMBER"; + case BIGNUMBER: return "BIGNUMBER"; + case BOOLEAN: return "BOOLEAN"; + case NULL: return "NULL"; + case OBJECT_START: return "OBJECT_START"; + case OBJECT_END: return "OBJECT_END"; + case ARRAY_START: return "ARRAY_START"; + case ARRAY_END: return "ARRAY_END"; + case EOF: return "EOF"; + } + return "Unknown: "+e; + } + + private static final CharArr devNull = new NullCharArr(); + + + final char[] buf; // input buffer with JSON text in it + int start; // current position in the buffer + int end; // end position in the buffer (one past last valid index) + final Reader in; // optional reader to obtain data from + boolean eof=false; // true if the end of the stream was reached. + long gpos; // global position = gpos + start + + int event; // last event read + + public JSONParser(Reader in) { + this(in, new char[8192]); + // 8192 matches the default buffer size of a BufferedReader so double + // buffering of the data is avoided. + } + + public JSONParser(Reader in, char[] buffer) { + this.in = in; + this.buf = buffer; + } + + // idea - if someone passes us a CharArrayReader, we could + // directly use that buffer as it's protected. + + public JSONParser(char[] data, int start, int end) { + this.in = null; + this.buf = data; + this.start = start; + this.end = end; + } + + public JSONParser(String data) { + this(data, 0, data.length()); + } + + public JSONParser(String data, int start, int end) { + this.in = null; + this.start = start; + this.end = end; + this.buf = new char[end-start]; + data.getChars(start,end,buf,0); + } + + // temporary output buffer + private final CharArr out = new CharArr(64); + + // We need to keep some state in order to (at a minimum) know if + // we should skip ',' or ':'. + private byte[] stack = new byte[16]; + private int ptr=0; // pointer into the stack of parser states + private byte state=0; // current parser state + + // parser states stored in the stack + private static final byte DID_OBJSTART =1; // '{' just read + private static final byte DID_ARRSTART =2; // '[' just read + private static final byte DID_ARRELEM =3; // array element just read + private static final byte DID_MEMNAME =4; // object member name (map key) just read + private static final byte DID_MEMVAL =5; // object member value (map val) just read + + // info about value that was just read (or is in the middle of being read) + private int valstate; + + // push current parser state (use at start of new container) + private final void push() { + if (ptr >= stack.length) { + // doubling here is probably overkill, but anything that needs to double more than + // once (32 levels deep) is very atypical anyway. + byte[] newstack = new byte[stack.length<<1]; + System.arraycopy(stack,0,newstack,0,stack.length); + stack = newstack; + } + stack[ptr++] = state; + } + + // pop parser state (use at end of container) + private final void pop() { + if (--ptr<0) { + throw err("Unbalanced container"); + } else { + state = stack[ptr]; + } + } + + protected void fill() throws IOException { + if (in!=null) { + gpos += end; + start=0; + int num = in.read(buf,0,buf.length); + end = num>=0 ? num : 0; + } + if (start>=end) eof=true; + } + + private void getMore() throws IOException { + fill(); + if (start>=end) { + throw err(null); + } + } + + protected int getChar() throws IOException { + if (start>=end) { + fill(); + if (start>=end) return -1; + } + return buf[start++]; + } + + private int getCharNWS() throws IOException { + for (;;) { + int ch = getChar(); + if (!(ch==' ' || ch=='\t' || ch=='\n' || ch=='\r')) return ch; + } + } + + private void expect(char[] arr) throws IOException { + for (int i=1; i0) start--; // backup one char + String chs = "char=" + ((start>=end) ? "(EOF)" : "" + (char)buf[start]); + String pos = "position=" + (gpos+start); + String tot = chs + ',' + pos + getContext(); + if (msg==null) { + if (start>=end) msg = "Unexpected EOF"; + else msg="JSON Parse Error"; + } + return new ParseException(msg + ": " + tot); + } + + private String getContext() { + String context = ""; + if (start>=0) { + context += " BEFORE='" + errEscape(Math.max(start-60,0), start+1) + "'"; + } + if (start=b) return ""; + return new String(buf, a, b-a).replaceAll("\\s+"," "); + } + + + private boolean bool; // boolean value read + private long lval; // long value read + private int nstate; // current state while reading a number + private static final int HAS_FRACTION = 0x01; // nstate flag, '.' already read + private static final int HAS_EXPONENT = 0x02; // nstate flag, '[eE][+-]?[0-9]' already read + + /** Returns the long read... only significant if valstate==LONG after + * this call. firstChar should be the first numeric digit read. + */ + private long readNumber(int firstChar, boolean isNeg) throws IOException { + out.unsafeWrite(firstChar); // unsafe OK since we know output is big enough + // We build up the number in the negative plane since it's larger (by one) than + // the positive plane. + long v = '0' - firstChar; + // can't overflow a long in 18 decimal digits (i.e. 17 additional after the first). + // we also need 22 additional to handle double so we'll handle in 2 separate loops. + int i; + for (i=0; i<17; i++) { + int ch = getChar(); + // TODO: is this switch faster as an if-then-else? + switch(ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + v = v*10 - (ch-'0'); + out.unsafeWrite(ch); + continue; + case '.': + out.unsafeWrite('.'); + valstate = readFrac(out,22-i); + return 0; + case 'e': + case 'E': + out.unsafeWrite(ch); + nstate=0; + valstate = readExp(out,22-i); + return 0; + default: + // return the number, relying on nextEvent() to return an error + // for invalid chars following the number. + if (ch!=-1) --start; // push back last char if not EOF + + valstate = LONG; + return isNeg ? v : -v; + } + } + + // after this, we could overflow a long and need to do extra checking + boolean overflow = false; + long maxval = isNeg ? Long.MIN_VALUE : -Long.MAX_VALUE; + + for (; i<22; i++) { + int ch = getChar(); + switch(ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (v < (0x8000000000000000L/10)) overflow=true; // can't multiply by 10 w/o overflowing + v *= 10; + int digit = ch - '0'; + if (v < maxval + digit) overflow=true; // can't add digit w/o overflowing + v -= digit; + out.unsafeWrite(ch); + continue; + case '.': + out.unsafeWrite('.'); + valstate = readFrac(out,22-i); + return 0; + case 'e': + case 'E': + out.unsafeWrite(ch); + nstate=0; + valstate = readExp(out,22-i); + return 0; + default: + // return the number, relying on nextEvent() to return an error + // for invalid chars following the number. + if (ch!=-1) --start; // push back last char if not EOF + + valstate = overflow ? BIGNUMBER : LONG; + return isNeg ? v : -v; + } + } + + + nstate=0; + valstate = BIGNUMBER; + return 0; + } + + + // read digits right of decimal point + private int readFrac(CharArr arr, int lim) throws IOException { + nstate = HAS_FRACTION; // deliberate set instead of '|' + while(--lim>=0) { + int ch = getChar(); + if (ch>='0' && ch<='9') { + arr.write(ch); + } else if (ch=='e' || ch=='E') { + arr.write(ch); + return readExp(arr,lim); + } else { + if (ch!=-1) start--; // back up + return NUMBER; + } + } + return BIGNUMBER; + } + + + // call after 'e' or 'E' has been seen to read the rest of the exponent + private int readExp(CharArr arr, int lim) throws IOException { + nstate |= HAS_EXPONENT; + int ch = getChar(); lim--; + + if (ch=='+' || ch=='-') { + arr.write(ch); + ch = getChar(); lim--; + } + + // make sure at least one digit is read. + if (ch<'0' || ch>'9') { + throw err("missing exponent number"); + } + arr.write(ch); + + return readExpDigits(arr,lim); + } + + // continuation of readExpStart + private int readExpDigits(CharArr arr, int lim) throws IOException { + while (--lim>=0) { + int ch = getChar(); + if (ch>='0' && ch<='9') { + arr.write(ch); + } else { + if (ch!=-1) start--; // back up + return NUMBER; + } + } + return BIGNUMBER; + } + + private void continueNumber(CharArr arr) throws IOException { + if (arr != out) arr.write(out); + + if ((nstate & HAS_EXPONENT)!=0){ + readExpDigits(arr, Integer.MAX_VALUE); + return; + } + if (nstate != 0) { + readFrac(arr, Integer.MAX_VALUE); + return; + } + + for(;;) { + int ch = getChar(); + if (ch>='0' && ch <='9') { + arr.write(ch); + } else if (ch=='.') { + arr.write(ch); + readFrac(arr,Integer.MAX_VALUE); + return; + } else if (ch=='e' || ch=='E') { + arr.write(ch); + readExp(arr,Integer.MAX_VALUE); + return; + } else { + if (ch!=-1) start--; + return; + } + } + } + + + private int hexval(int hexdig) { + if (hexdig>='0' && hexdig <='9') { + return hexdig-'0'; + } else if (hexdig>='A' && hexdig <='F') { + return hexdig+(10-'A'); + } else if (hexdig>='a' && hexdig <='f') { + return hexdig+(10-'a'); + } + throw err("invalid hex digit"); + } + + // backslash has already been read when this is called + private char readEscapedChar() throws IOException { + switch (getChar()) { + case '"' : return '"'; + case '\\' : return '\\'; + case '/' : return '/'; + case 'n' : return '\n'; + case 'r' : return '\r'; + case 't' : return '\t'; + case 'f' : return '\f'; + case 'b' : return '\b'; + case 'u' : + return (char)( + (hexval(getChar()) << 12) + | (hexval(getChar()) << 8) + | (hexval(getChar()) << 4) + | (hexval(getChar()))); + } + throw err("Invalid character escape in string"); + } + + // a dummy buffer we can use to point at other buffers + private final CharArr tmp = new CharArr(null,0,0); + + private CharArr readStringChars() throws IOException { + char c=0; + int i; + for (i=start; i=end) { + arr.write(buf,start,middle-start); + start=middle; + getMore(); + middle=start; + } + int ch = buf[middle++]; + if (ch=='"') { + int len = middle-start-1; + if (len>0) arr.write(buf,start,len); + start=middle; + return; + } else if (ch=='\\') { + int len = middle-start-1; + if (len>0) arr.write(buf,start,len); + start=middle; + arr.write(readEscapedChar()); + middle=start; + } + } + } + + + /*** alternate implelentation + // middle is the pointer to the middle of a buffer to start scanning for a non-string + // character ('"' or "/"). start<=middle=end) { + getMore(); + middle=start; + } else { + start = middle+1; // set buffer pointer to correct spot + if (ch=='"') { + valstate=0; + return; + } else if (ch=='\\') { + arr.write(readEscapedChar()); + if (start>=end) getMore(); + middle=start; + } + } + } + } + ***/ + + + // return the next event when parser is in a neutral state (no + // map separators or array element separators to read + private int next(int ch) throws IOException { + for(;;) { + switch (ch) { + case ' ': + case '\t': break; + case '\r': + case '\n': break; // try and keep track of linecounts? + case '"' : + valstate = STRING; + return STRING; + case '{' : + push(); + state= DID_OBJSTART; + return OBJECT_START; + case '[': + push(); + state=DID_ARRSTART; + return ARRAY_START; + case '0' : + out.reset(); + //special case '0'? If next char isn't '.' val=0 + ch=getChar(); + if (ch=='.') { + start--; ch='0'; + readNumber('0',false); + return valstate; + } else if (ch>'9' || ch<'0') { + out.unsafeWrite('0'); + if (ch!=-1) start--; + lval = 0; + valstate=LONG; + return LONG; + } else { + throw err("Leading zeros not allowed"); + } + case '1' : + case '2' : + case '3' : + case '4' : + case '5' : + case '6' : + case '7' : + case '8' : + case '9' : + out.reset(); + lval = readNumber(ch,false); + return valstate; + case '-' : + out.reset(); + out.unsafeWrite('-'); + ch = getChar(); + if (ch<'0' || ch>'9') throw err("expected digit after '-'"); + lval = readNumber(ch,true); + return valstate; + case 't': + valstate=BOOLEAN; + // TODO: test performance of this non-branching inline version. + // if ((('r'-getChar())|('u'-getChar())|('e'-getChar())) != 0) err(""); + expect(JSONUtil.TRUE_CHARS); + bool=true; + return BOOLEAN; + case 'f': + valstate=BOOLEAN; + expect(JSONUtil.FALSE_CHARS); + bool=false; + return BOOLEAN; + case 'n': + valstate=NULL; + expect(JSONUtil.NULL_CHARS); + return NULL; + case -1: + if (getLevel()>0) throw err("Premature EOF"); + return EOF; + default: throw err(null); + } + + ch = getChar(); + } + } + + public String toString() { + return "start="+start+",end="+end+",state="+state+"valstate="+valstate; + } + + + /** Returns the next event encountered in the JSON stream, one of + *
    + *
  • {@link #STRING}
  • + *
  • {@link #LONG}
  • + *
  • {@link #NUMBER}
  • + *
  • {@link #BIGNUMBER}
  • + *
  • {@link #BOOLEAN}
  • + *
  • {@link #NULL}
  • + *
  • {@link #OBJECT_START}
  • + *
  • {@link #OBJECT_END}
  • + *
  • {@link #OBJECT_END}
  • + *
  • {@link #ARRAY_START}
  • + *
  • {@link #ARRAY_END}
  • + *
  • {@link #EOF}
  • + *
+ */ + public int nextEvent() throws IOException { + if (valstate==STRING) { + readStringChars2(devNull,start); + } + else if (valstate==BIGNUMBER) { + continueNumber(devNull); + } + + valstate=0; + + int ch; // TODO: factor out getCharNWS() to here and check speed + switch (state) { + case 0: + return event = next(getCharNWS()); + case DID_OBJSTART: + ch = getCharNWS(); + if (ch=='}') { + pop(); + return event = OBJECT_END; + } + if (ch != '"') { + throw err("Expected string"); + } + state = DID_MEMNAME; + valstate = STRING; + return event = STRING; + case DID_MEMNAME: + ch = getCharNWS(); + if (ch!=':') { + throw err("Expected key,value separator ':'"); + } + state = DID_MEMVAL; // set state first because it might be pushed... + return event = next(getChar()); + case DID_MEMVAL: + ch = getCharNWS(); + if (ch=='}') { + pop(); + return event = OBJECT_END; + } else if (ch!=',') { + throw err("Expected ',' or '}'"); + } + ch = getCharNWS(); + if (ch != '"') { + throw err("Expected string"); + } + state = DID_MEMNAME; + valstate = STRING; + return event = STRING; + case DID_ARRSTART: + ch = getCharNWS(); + if (ch==']') { + pop(); + return event = ARRAY_END; + } + state = DID_ARRELEM; // set state first, might be pushed... + return event = next(ch); + case DID_ARRELEM: + ch = getCharNWS(); + if (ch==']') { + pop(); + return event = ARRAY_END; + } else if (ch!=',') { + throw err("Expected ',' or ']'"); + } + // state = DID_ARRELEM; + return event = next(getChar()); + } + return 0; + } + + public int lastEvent() { + return event; + } + + public boolean wasKey() + { + return state == DID_MEMNAME; + } + + + private void goTo(int what) throws IOException { + if (valstate==what) { valstate=0; return; } + if (valstate==0) { + int ev = nextEvent(); // TODO + if (valstate!=what) { + throw err("type mismatch"); + } + valstate=0; + } + else { + throw err("type mismatch"); + } + } + + /** Returns the JSON string value, decoding any escaped characters. */ + public String getString() throws IOException { + return getStringChars().toString(); + } + + /** Returns the characters of a JSON string value, decoding any escaped characters. + *

The underlying buffer of the returned CharArr should *not* be + * modified as it may be shared with the input buffer. + *

The returned CharArr will only be valid up until + * the next JSONParser method is called. Any required data should be + * read before that point. + */ + public CharArr getStringChars() throws IOException { + goTo(STRING); + return readStringChars(); + } + + /** Reads a JSON string into the output, decoding any escaped characters. */ + public void getString(CharArr output) throws IOException { + goTo(STRING); + readStringChars2(output,start); + } + + /** Reads a number from the input stream and parses it as a long, only if + * the value will in fact fit into a signed 64 bit integer. */ + public long getLong() throws IOException { + goTo(LONG); + return lval; + } + + /** Reads a number from the input stream and parses it as a double */ + public double getDouble() throws IOException { + return Double.parseDouble(getNumberChars().toString()); + } + + /** Returns the characters of a JSON numeric value. + *

The underlying buffer of the returned CharArr should *not* be + * modified as it may be shared with the input buffer. + *

The returned CharArr will only be valid up until + * the next JSONParser method is called. Any required data should be + * read before that point. + */ + public CharArr getNumberChars() throws IOException { + int ev=0; + if (valstate==0) ev = nextEvent(); + + if (valstate == LONG || valstate == NUMBER) { + valstate=0; + return out; + } + else if (valstate==BIGNUMBER) { + continueNumber(out); + valstate=0; + return out; + } else { + throw err("Unexpected " + ev); + } + } + + /** Reads a JSON numeric value into the output. */ + public void getNumberChars(CharArr output) throws IOException { + int ev=0; + if (valstate==0) ev=nextEvent(); + if (valstate == LONG || valstate == NUMBER) output.write(this.out); + else if (valstate==BIGNUMBER) { + continueNumber(output); + } else { + throw err("Unexpected " + ev); + } + valstate=0; + } + + /** Reads a boolean value */ + public boolean getBoolean() throws IOException { + goTo(BOOLEAN); + return bool; + } + + /** Reads a null value */ + public void getNull() throws IOException { + goTo(NULL); + } + + /** + * @return the current nesting level, the number of parent objects or arrays. + */ + public int getLevel() { + return ptr; + } + + public long getPosition() + { + return gpos+start; + } +} Property changes on: solr/core/src/java/org/apache/solr/internal/noggit/JSONParser.java ___________________________________________________________________ Added: svn:executable + * Index: solr/core/src/java/org/apache/solr/internal/noggit/JSONWriter.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/noggit/JSONWriter.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/noggit/JSONWriter.java (revision 0) @@ -0,0 +1,344 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.internal.noggit; + +import java.util.*; + +/** + * @author yonik + * @version $Id: JSONWriter.java 1211150 2011-12-06 21:10:01Z yonik $ + */ +public class JSONWriter { + + /** Implement this interface on your class to support serialization */ + public static interface Writable { + public void write(JSONWriter writer); + } + + protected int level; + protected int indent; + protected final CharArr out; + + /** + * @param out the CharArr to write the output to. + * @param indentSize The number of space characters to use as an indent (default 2). 0=newlines but no spaces, -1=no indent at all. + */ + public JSONWriter(CharArr out, int indentSize) { + this.out = out; + this.indent = indentSize; + } + + public JSONWriter(CharArr out) { + this(out, 2); + } + + public void setIndentSize(int indentSize) { + this.indent = indentSize; + } + + public void indent() { + if (indent >= 0) { + out.write('\n'); + if (indent > 0) { + int spaces = level*indent; + out.reserve(spaces); + for (int i=0; i)val.entrySet()) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + if (sz>1) indent(); + writeString(entry.getKey().toString()); + writeNameSeparator(); + write(entry.getValue()); + } + endObject(); + } + + public void write(Collection val) { + startArray(); + int sz = val.size(); + boolean first = true; + for (Object o : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + if (sz>1) indent(); + write(o); + } + endArray(); + } + + /** A byte[] may be either a single logical value, or a list of small integers. + * It's up to the implementation to decide. + */ + public void write(byte[] val) { + startArray(); + boolean first = true; + for (short v : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + write(v); + } + endArray(); + } + + public void write(short[] val) { + startArray(); + boolean first = true; + for (short v : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + write(v); + } + endArray(); + } + + public void write(int[] val) { + startArray(); + boolean first = true; + for (int v : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + write(v); + } + endArray(); + } + + public void write(long[] val) { + startArray(); + boolean first = true; + for (long v : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + write(v); + } + endArray(); + } + + public void write(float[] val) { + startArray(); + boolean first = true; + for (float v : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + write(v); + } + endArray(); + } + + public void write(double[] val) { + startArray(); + boolean first = true; + for (double v : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + write(v); + } + endArray(); + } + + public void write(boolean[] val) { + startArray(); + boolean first = true; + for (boolean v : val) { + if (first) { + first = false; + } else { + writeValueSeparator(); + } + write(v); + } + endArray(); + } + + + public void write(short number) { write ((int)number); } + public void write(byte number) { write((int)number); } + + + public void writeNull() { + JSONUtil.writeNull(out); + } + + public void writeString(CharSequence str) { + JSONUtil.writeString(str,0,str.length(),out); + } + + public void writeString(CharArr str) { + JSONUtil.writeString(str,out); + } + + public void writeStringStart() { + out.write('"'); + } + + public void writeStringChars(CharArr partialStr) { + JSONUtil.writeStringPart(partialStr.getArray(), partialStr.getStart(), partialStr.getEnd(), out); + } + + public void writeStringEnd() { + out.write('"'); + } + + public void write(long number) { + JSONUtil.writeNumber(number,out); + } + + public void write(int number) { + JSONUtil.writeNumber(number,out); + } + + public void write(double number) { + JSONUtil.writeNumber(number,out); + } + + public void write(float number) { + JSONUtil.writeNumber(number,out); + } + + public void write(boolean bool) { + JSONUtil.writeBoolean(bool,out); + } + + public void write(char[] val) { + JSONUtil.writeString(val, 0, val.length, out); + } + + public void writeNumber(CharArr digits) { + out.write(digits); + } + + public void writePartialNumber(CharArr digits) { + out.write(digits); + } + + public void startObject() { + out.write('{'); + level++; + } + + public void endObject() { + out.write('}'); + level--; + } + + public void startArray() { + out.write('['); + level++; + } + + public void endArray() { + out.write(']'); + level--; + } + + public void writeValueSeparator() { + out.write(','); + } + + public void writeNameSeparator() { + out.write(':'); + } + +} + Property changes on: solr/core/src/java/org/apache/solr/internal/noggit/JSONWriter.java ___________________________________________________________________ Added: svn:executable + * Index: solr/core/src/java/org/apache/solr/internal/noggit/JSONUtil.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/noggit/JSONUtil.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/noggit/JSONUtil.java (revision 0) @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.internal.noggit; + +/** + * @author yonik + * @version $Id: JSONUtil.java 1209632 2011-12-02 18:48:42Z yonik $ + */ + +public class JSONUtil { + public static final char[] TRUE_CHARS = new char[] {'t','r','u','e'}; + public static final char[] FALSE_CHARS = new char[] {'f','a','l','s','e'}; + public static final char[] NULL_CHARS = new char[] {'n','u','l','l'}; + public static final char[] HEX_CHARS = new char[] {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; + public static final char VALUE_SEPARATOR = ','; + public static final char NAME_SEPARATOR = ':'; + public static final char OBJECT_START = '{'; + public static final char OBJECT_END = '}'; + public static final char ARRAY_START = '['; + public static final char ARRAY_END = ']'; + + public static String toJSON(Object o) { + CharArr out = new CharArr(); + new JSONWriter(out).write(o); + return out.toString(); + } + + /** + * @param o The object to convert to JSON + * @param indentSize The number of space characters to use as an indent (default 2). 0=newlines but no spaces, -1=no indent at all. + * @return + */ + public static String toJSON(Object o, int indentSize) { + CharArr out = new CharArr(); + new JSONWriter(out,indentSize).write(o); + return out.toString(); + } + + public static void writeNumber(int number, CharArr out) { + out.write(Integer.toString(number)); + } + + public static void writeNumber(long number, CharArr out) { + out.write(Long.toString(number)); + } + + public static void writeNumber(float number, CharArr out) { + out.write(Float.toString(number)); + } + + public static void writeNumber(double number, CharArr out) { + out.write(Double.toString(number)); + } + + public static void writeString(CharArr val, CharArr out) { + writeString(val.getArray(), val.getStart(), val.getEnd(), out); + } + + public static void writeString(char[] val, int start, int end, CharArr out) { + out.write('"'); + writeStringPart(val,start,end,out); + out.write('"'); + } + + public static void writeString(CharSequence val, int start, int end, CharArr out) { + out.write('"'); + writeStringPart(val,start,end,out); + out.write('"'); + } + + public static void writeStringPart(char[] val, int start, int end, CharArr out) { + for (int i=start; i>>12]); + out.write(HEX_CHARS[(ch>>>8)&0xf]); + out.write(HEX_CHARS[(ch>>>4)&0xf]); + out.write(HEX_CHARS[ch&0xf]); + } + + public static void writeNull(CharArr out) { + out.write(NULL_CHARS); + } + + public static void writeBoolean(boolean val, CharArr out) { + out.write(val ? TRUE_CHARS : FALSE_CHARS); + } + +} Property changes on: solr/core/src/java/org/apache/solr/internal/noggit/JSONUtil.java ___________________________________________________________________ Added: svn:executable + * Index: solr/core/src/java/org/apache/solr/internal/noggit/CharUtil.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/noggit/CharUtil.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/noggit/CharUtil.java (revision 0) @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.internal.noggit; + + +/** + * @author yonik + * @version $Id: CharUtil.java 479919 2006-11-28 05:53:55Z yonik $ + */ +public class CharUtil { + + // belongs in number utils or charutil? + public long parseLong(char[] arr, int start, int end) { + long x = 0; + boolean negative = arr[start] == '-'; + for (int i=negative ? start+1 : start; i=0) { + int c = a[a_start] - b[b_start]; + if (c!=0) return c; + a_start++; b_start++; + } + return a_len-b_len; + } + +} Property changes on: solr/core/src/java/org/apache/solr/internal/noggit/CharUtil.java ___________________________________________________________________ Added: svn:executable + * Index: solr/core/src/java/org/apache/solr/internal/noggit/ObjectBuilder.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/noggit/ObjectBuilder.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/noggit/ObjectBuilder.java (revision 0) @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.internal.noggit; + +import java.util.*; +import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; + +/** + * @author yonik + * @version $Id$ + */ +public class ObjectBuilder { + + public static Object fromJSON(String json) throws IOException { + JSONParser p = new JSONParser(json); + return getVal(p); + } + + public static Object getVal(JSONParser parser) throws IOException { + return new ObjectBuilder(parser).getVal(); + } + + final JSONParser parser; + + public ObjectBuilder(JSONParser parser) throws IOException { + this.parser = parser; + if (parser.lastEvent()==0) parser.nextEvent(); + } + + + public Object getVal() throws IOException { + int ev = parser.lastEvent(); + switch(ev) { + case JSONParser.STRING: return getString(); + case JSONParser.LONG: return getLong(); + case JSONParser.NUMBER: return getNumber(); + case JSONParser.BIGNUMBER: return getBigNumber(); + case JSONParser.BOOLEAN: return getBoolean(); + case JSONParser.NULL: return getNull(); + case JSONParser.OBJECT_START: return getObject(); + case JSONParser.OBJECT_END: return null; // OR ERROR? + case JSONParser.ARRAY_START: return getArray(); + case JSONParser.ARRAY_END: return null; // OR ERROR? + case JSONParser.EOF: return null; // OR ERROR? + default: return null; // OR ERROR? + } + } + + + public Object getString() throws IOException { + return parser.getString(); + } + + public Object getLong() throws IOException { + return Long.valueOf(parser.getLong()); + } + + public Object getNumber() throws IOException { + CharArr num = parser.getNumberChars(); + String numstr = num.toString(); + double d = Double.parseDouble(numstr); + if (!Double.isInfinite(d)) return Double.valueOf(d); + // TODO: use more efficient constructor in Java5 + return new BigDecimal(numstr); + } + + public Object getBigNumber() throws IOException { + CharArr num = parser.getNumberChars(); + String numstr = num.toString(); + for(int ch; (ch=num.read())!=-1;) { + if (ch=='.' || ch=='e' || ch=='E') return new BigDecimal(numstr); + } + return new BigInteger(numstr); + } + + public Object getBoolean() throws IOException { + return parser.getBoolean(); + } + + public Object getNull() throws IOException { + parser.getNull(); + return null; + } + + public Object newObject() throws IOException { + return new LinkedHashMap(); + } + + public Object getKey() throws IOException { + return parser.getString(); + } + + public void addKeyVal(Object map, Object key, Object val) throws IOException { + Object prev = ((Map)map).put(key,val); + // TODO: test for repeated value? + } + + public Object objectEnd(Object obj) { + return obj; + } + + + public Object getObject() throws IOException { + Object m = newObject(); + for(;;) { + int ev = parser.nextEvent(); + if (ev==JSONParser.OBJECT_END) return objectEnd(m); + Object key = getKey(); + ev = parser.nextEvent(); + Object val = getVal(); + addKeyVal(m, key, val); + } + } + + public Object newArray() { + return new ArrayList(); + } + + public void addArrayVal(Object arr, Object val) throws IOException { + ((List)arr).add(val); + } + + public Object endArray(Object arr) { + return arr; + } + + public Object getArray() throws IOException { + Object arr = newArray(); + for(;;) { + int ev = parser.nextEvent(); + if (ev==JSONParser.ARRAY_END) return endArray(arr); + Object val = getVal(); + addArrayVal(arr, val); + } + } + +} Index: solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java (revision 0) @@ -0,0 +1,307 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.internal.csv; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.io.Writer; + +/** + * Print values as a comma separated list. + */ +public class CSVPrinter { + + /** The place that the values get written. */ + protected final Writer out; + protected final CSVStrategy strategy; + + /** True if we just began a new line. */ + protected boolean newLine = true; + + protected char[] buf = new char[0]; // temporary buffer + + /** + * Create a printer that will print values to the given + * stream following the CSVStrategy. + * + * Currently, only a pure encapsulation strategy or a pure escaping strategy + * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported. + * + * @param out stream to which to print. + * @param strategy describes the CSV variation. + */ + public CSVPrinter(Writer out, CSVStrategy strategy) { + this.out = out; + this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy; + } + + // ====================================================== + // printing implementation + // ====================================================== + + /** + * Output a blank line + */ + public void println() throws IOException { + out.write(strategy.getPrinterNewline()); + newLine = true; + } + + public void flush() throws IOException { + out.flush(); + } + + + /** + * Print a single line of comma separated values. + * The values will be quoted if needed. Quotes and + * newLine characters will be escaped. + * + * @param values values to be outputted. + */ + public void println(String[] values) throws IOException { + for (int i = 0; i < values.length; i++) { + print(values[i]); + } + println(); + } + + + /** + * Put a comment among the comma separated values. + * Comments will always begin on a new line and occupy a + * least one full line. The character specified to star + * comments and a space will be inserted at the beginning of + * each new line in the comment. + * + * @param comment the comment to output + */ + public void printlnComment(String comment) throws IOException { + if(this.strategy.isCommentingDisabled()) { + return; + } + if (!newLine) { + println(); + } + out.write(this.strategy.getCommentStart()); + out.write(' '); + for (int i = 0; i < comment.length(); i++) { + char c = comment.charAt(i); + switch (c) { + case '\r' : + if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') { + i++; + } + // break intentionally excluded. + case '\n' : + println(); + out.write(this.strategy.getCommentStart()); + out.write(' '); + break; + default : + out.write(c); + break; + } + } + println(); + } + + + public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException { + if (!checkForEscape) { + printSep(); + out.write(value, offset, len); + return; + } + + if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) { + printAndEncapsulate(value, offset, len); + } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) { + printAndEscape(value, offset, len); + } else { + printSep(); + out.write(value, offset, len); + } + } + + void printSep() throws IOException { + if (newLine) { + newLine = false; + } else { + out.write(this.strategy.getDelimiter()); + } + } + + void printAndEscape(char[] value, int offset, int len) throws IOException { + int start = offset; + int pos = offset; + int end = offset + len; + + printSep(); + + char delim = this.strategy.getDelimiter(); + char escape = this.strategy.getEscape(); + + while (pos < end) { + char c = value[pos]; + if (c == '\r' || c=='\n' || c==delim || c==escape) { + // write out segment up until this char + int l = pos-start; + if (l>0) { + out.write(value, start, l); + } + if (c=='\n') c='n'; + else if (c=='\r') c='r'; + + out.write(escape); + out.write(c); + + start = pos+1; // start on the current char after this one + } + + pos++; + } + + // write last segment + int l = pos-start; + if (l>0) { + out.write(value, start, l); + } + } + + void printAndEncapsulate(char[] value, int offset, int len) throws IOException { + boolean first = newLine; // is this the first value on this line? + boolean quote = false; + int start = offset; + int pos = offset; + int end = offset + len; + + printSep(); + + char delim = this.strategy.getDelimiter(); + char encapsulator = this.strategy.getEncapsulator(); + + if (len <= 0) { + // always quote an empty token that is the first + // on the line, as it may be the only thing on the + // line. If it were not quoted in that case, + // an empty line has no tokens. + if (first) { + quote = true; + } + } else { + char c = value[pos]; + + // Hmmm, where did this rule come from? + if (first + && (c < '0' + || (c > '9' && c < 'A') + || (c > 'Z' && c < 'a') + || (c > 'z'))) { + quote = true; + // } else if (c == ' ' || c == '\f' || c == '\t') { + } else if (c <= '#') { + // Some other chars at the start of a value caused the parser to fail, so for now + // encapsulate if we start in anything less than '#'. We are being conservative + // by including the default comment char too. + quote = true; + } else { + while (pos < end) { + c = value[pos]; + if (c=='\n' || c=='\r' || c==encapsulator || c==delim) { + quote = true; + break; + } + pos++; + } + + if (!quote) { + pos = end-1; + c = value[pos]; + // if (c == ' ' || c == '\f' || c == '\t') { + // Some other chars at the end caused the parser to fail, so for now + // encapsulate if we end in anything less than ' ' + if (c <= ' ') { + quote = true; + } + } + } + } + + if (!quote) { + // no encapsulation needed - write out the original value + out.write(value, offset, len); + return; + } + + // we hit something that needed encapsulation + out.write(encapsulator); + + // Pick up where we left off: pos should be positioned on the first character that caused + // the need for encapsulation. + while (pos 0 && ready()) { + if (lookaheadChar == -1) { + // eof stream reached, do not continue + return cOff - off; + } else { + buf[cOff++] = (char) lookaheadChar; + if (lookaheadChar == '\n') { + lineCounter++; + } + lastChar = lookaheadChar; + lookaheadChar = super.read(); + len--; + } + } + return cOff - off; + } + + /** + * Reads all characters up to (but not including) the given character. + * + * @param c the character to read up to + * @return the string up to the character c + * @throws IOException + */ + public String readUntil(char c) throws IOException { + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + line.clear(); // reuse + while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) { + line.append((char) lookaheadChar); + if (lookaheadChar == '\n') { + lineCounter++; + } + lastChar = lookaheadChar; + lookaheadChar = super.read(); + } + return line.toString(); + } + + /** + * @return A String containing the contents of the line, not + * including any line-termination characters, or null + * if the end of the stream has been reached + */ + public String readLine() throws IOException { + + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + + line.clear(); //reuse + + // return null if end of stream has been reached + if (lookaheadChar == END_OF_STREAM) { + return null; + } + // do we have a line termination already + char laChar = (char) lookaheadChar; + if (laChar == '\n' || laChar == '\r') { + lastChar = lookaheadChar; + lookaheadChar = super.read(); + // ignore '\r\n' as well + if ((char) lookaheadChar == '\n') { + lastChar = lookaheadChar; + lookaheadChar = super.read(); + } + lineCounter++; + return line.toString(); + } + + // create the rest-of-line return and update the lookahead + line.append(laChar); + String restOfLine = super.readLine(); // TODO involves copying + lastChar = lookaheadChar; + lookaheadChar = super.read(); + if (restOfLine != null) { + line.append(restOfLine); + } + lineCounter++; + return line.toString(); + } + + /** + * Skips char in the stream + * + * ATTENTION: invalidates the line-counter !!!!! + * + * @return nof skiped chars + */ + public long skip(long n) throws IllegalArgumentException, IOException { + + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + + // illegal argument + if (n < 0) { + throw new IllegalArgumentException("negative argument not supported"); + } + + // no skipping + if (n == 0 || lookaheadChar == END_OF_STREAM) { + return 0; + } + + // skip and reread the lookahead-char + long skiped = 0; + if (n > 1) { + skiped = super.skip(n - 1); + } + lookaheadChar = super.read(); + // fixme uh: we should check the skiped sequence for line-terminations... + lineCounter = Integer.MIN_VALUE; + return skiped + 1; + } + + /** + * Skips all chars in the input until (but excluding) the given char + * + * @param c + * @return + * @throws IllegalArgumentException + * @throws IOException + */ + public long skipUntil(char c) throws IllegalArgumentException, IOException { + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + long counter = 0; + while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) { + if (lookaheadChar == '\n') { + lineCounter++; + } + lookaheadChar = super.read(); + counter++; + } + return counter; + } + + /** + * Returns the next char in the stream without consuming it. + * + * Remember the next char read by read(..) will always be + * identical to lookAhead(). + * + * @return the next char (without consuming it) or END_OF_STREAM + */ + public int lookAhead() throws IOException { + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + return lookaheadChar; + } + + + /** + * Returns the nof line read + * ATTENTION: the skip-method does invalidate the line-number counter + * + * @return the current-line-number (or -1) + */ + public int getLineNumber() { + if (lineCounter > -1) { + return lineCounter; + } else { + return -1; + } + } + public boolean markSupported() { + /* note uh: marking is not supported, cause we cannot + * see into the future... + */ + return false; + } + +} Index: solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/CharBuffer.java (revision 0) @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.solr.internal.csv; + +/** + * A simple StringBuffer replacement that aims to + * reduce copying as much as possible. The buffer + * grows as necessary. + * This class is not thread safe. + * + * @author Ortwin Gl�ck + */ +public class CharBuffer { + + private char[] c; + + /** + * Actually used number of characters in the array. + * It is also the index at which + * a new character will be inserted into c. + */ + private int length; + + /** + * Creates a new CharBuffer with an initial capacity of 32 characters. + */ + public CharBuffer() { + this(32); + } + + /** + * Creates a new CharBuffer with an initial capacity + * of length characters. + */ + public CharBuffer(final int length) { + if (length == 0) { + throw new IllegalArgumentException("Can't create an empty CharBuffer"); + } + this.c = new char[length]; + } + + /** + * Empties the buffer. The capacity still remains the same, so no memory is freed. + */ + public void clear() { + length = 0; + } + + /** + * Returns the number of characters in the buffer. + * @return the number of characters + */ + public int length() { + return length; + } + + /** + * Returns the current capacity of the buffer. + * @return the maximum number of characters that can be stored in this buffer without + * resizing it. + */ + public int capacity() { + return c.length; + } + + + /** + * Appends the contents of cb to the end of this CharBuffer. + * @param cb the CharBuffer to append or null + */ + public void append(final CharBuffer cb) { + if (cb == null) { + return; + } + provideCapacity(length + cb.length); + System.arraycopy(cb.c, 0, c, length, cb.length); + length += cb.length; + } + + /** + * Appends s to the end of this CharBuffer. + * This method involves copying the new data once! + * @param s the String to append or null + */ + public void append(final String s) { + if (s == null) { + return; + } + append(s.toCharArray()); + } + + /** + * Appends sb to the end of this CharBuffer. + * This method involves copying the new data once! + * @param sb the StringBuffer to append or null + */ + public void append(final StringBuffer sb) { + if (sb == null) { + return; + } + provideCapacity(length + sb.length()); + sb.getChars(0, sb.length(), c, length); + length += sb.length(); + } + + /** + * Appends data to the end of this CharBuffer. + * This method involves copying the new data once! + * @param data the char[] to append or null + */ + public void append(final char[] data) { + if (data == null) { + return; + } + provideCapacity(length + data.length); + System.arraycopy(data, 0, c, length, data.length); + length += data.length; + } + + /** + * Appends a single character to the end of this CharBuffer. + * This method involves copying the new data once! + * @param data the char to append + */ + public void append(final char data) { + provideCapacity(length + 1); + c[length] = data; + length++; + } + + /** + * Shrinks the capacity of the buffer to the current length if necessary. + * This method involves copying the data once! + */ + public void shrink() { + if (c.length == length) { + return; + } + char[] newc = new char[length]; + System.arraycopy(c, 0, newc, 0, length); + c = newc; + } + + /** + * Removes trailing whitespace. + */ + public void trimTrailingWhitespace() { + while (length>0 && Character.isWhitespace(c[length-1])) { + length--; + } + } + + /** + * Returns the contents of the buffer as a char[]. The returned array may + * be the internal array of the buffer, so the caller must take care when + * modifying it. + * This method allows to avoid copying if the caller knows the exact capacity + * before. + * @return + */ + public char[] getCharacters() { + if (c.length == length) { + return c; + } + char[] chars = new char[length]; + System.arraycopy(c, 0, chars, 0, length); + return chars; + } + + /** + * Returns the character at the specified position. + */ + public char charAt(int pos) { + return c[pos]; + } + + /** + * Converts the contents of the buffer into a StringBuffer. + * This method involves copying the new data once! + * @return + */ + public StringBuffer toStringBuffer() { + StringBuffer sb = new StringBuffer(length); + sb.append(c, 0, length); + return sb; + } + + /** + * Converts the contents of the buffer into a StringBuffer. + * This method involves copying the new data once! + * @return + */ + public String toString() { + return new String(c, 0, length); + } + + /** + * Copies the data into a new array of at least capacity size. + * @param capacity + */ + public void provideCapacity(final int capacity) { + if (c.length >= capacity) { + return; + } + int newcapacity = ((capacity*3)>>1) + 1; + char[] newc = new char[newcapacity]; + System.arraycopy(c, 0, newc, 0, length); + c = newc; + } +} Index: solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java (revision 0) @@ -0,0 +1,605 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.internal.csv; + +import java.io.IOException; +import java.io.Reader; +import java.io.InputStreamReader; +import java.io.InputStream; +import java.util.ArrayList; + + +/** + * Parses CSV files according to the specified configuration. + * + * Because CSV appears in many different dialects, the parser supports many + * configuration settings by allowing the specification of a {@link CSVStrategy}. + * + *

Parsing of a csv-string having tabs as separators, + * '"' as an optional value encapsulator, and comments starting with '#':

+ *
+ *  String[][] data = 
+ *   (new CSVParser(new StringReader("a\tb\nc\td"), new CSVStrategy('\t','"','#'))).getAllValues();
+ * 
+ * + *

Parsing of a csv-string in Excel CSV format

+ *
+ *  String[][] data =
+ *   (new CSVParser(new StringReader("a;b\nc;d"), CSVStrategy.EXCEL_STRATEGY)).getAllValues();
+ * 
+ * + *

+ * Internal parser state is completely covered by the strategy + * and the reader-state.

+ * + *

see package documentation + * for more details

+ */ +public class CSVParser { + + /** length of the initial token (content-)buffer */ + private static final int INITIAL_TOKEN_LENGTH = 50; + + // the token types + /** Token has no valid content, i.e. is in its initilized state. */ + protected static final int TT_INVALID = -1; + /** Token with content, at beginning or in the middle of a line. */ + protected static final int TT_TOKEN = 0; + /** Token (which can have content) when end of file is reached. */ + protected static final int TT_EOF = 1; + /** Token with content when end of a line is reached. */ + protected static final int TT_EORECORD = 2; + + /** Immutable empty String array. */ + private static final String[] EMPTY_STRING_ARRAY = new String[0]; + + // the input stream + private final ExtendedBufferedReader in; + + private final CSVStrategy strategy; + + // the following objects are shared to reduce garbage + /** A record buffer for getLine(). Grows as necessary and is reused. */ + private final ArrayList record = new ArrayList(); + private final Token reusableToken = new Token(); + private final CharBuffer wsBuf = new CharBuffer(); + private final CharBuffer code = new CharBuffer(4); + + + /** + * Token is an internal token representation. + * + * It is used as contract between the lexer and the parser. + */ + static class Token { + /** Token type, see TT_xxx constants. */ + int type = TT_INVALID; + /** The content buffer. */ + CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH); + /** Token ready flag: indicates a valid token with content (ready for the parser). */ + boolean isReady; + + Token reset() { + content.clear(); + type = TT_INVALID; + isReady = false; + return this; + } + } + + // ====================================================== + // the constructor + // ====================================================== + + /** + * Default strategy for the parser follows the default {@link CSVStrategy}. + * + * @param input an InputStream containing "csv-formatted" stream + * @deprecated use {@link #CSVParser(Reader)}. + */ + public CSVParser(InputStream input) { + this(new InputStreamReader(input)); + } + + /** + * CSV parser using the default {@link CSVStrategy}. + * + * @param input a Reader containing "csv-formatted" input + */ + public CSVParser(Reader input) { + // note: must match default-CSV-strategy !! + this(input, ','); + } + + /** + * Customized value delimiter parser. + * + * The parser follows the default {@link CSVStrategy} + * except for the delimiter setting. + * + * @param input a Reader based on "csv-formatted" input + * @param delimiter a Char used for value separation + * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}. + */ + public CSVParser(Reader input, char delimiter) { + this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED); + } + + /** + * Customized csv parser. + * + * The parser parses according to the given CSV dialect settings. + * Leading whitespaces are truncated, unicode escapes are + * not interpreted and empty lines are ignored. + * + * @param input a Reader based on "csv-formatted" input + * @param delimiter a Char used for value separation + * @param encapsulator a Char used as value encapsulation marker + * @param commentStart a Char used for comment identification + * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}. + */ + public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) { + this(input, new CSVStrategy(delimiter, encapsulator, commentStart)); + } + + /** + * Customized CSV parser using the given {@link CSVStrategy} + * + * @param input a Reader containing "csv-formatted" input + * @param strategy the CSVStrategy used for CSV parsing + */ + public CSVParser(Reader input, CSVStrategy strategy) { + this.in = new ExtendedBufferedReader(input); + this.strategy = strategy; + } + + // ====================================================== + // the parser + // ====================================================== + + /** + * Parses the CSV according to the given strategy + * and returns the content as an array of records + * (whereas records are arrays of single values). + *

+ * The returned content starts at the current parse-position in + * the stream. + * + * @return matrix of records x values ('null' when end of file) + * @throws IOException on parse error or input read-failure + */ + public String[][] getAllValues() throws IOException { + ArrayList records = new ArrayList(); + String[] values; + String[][] ret = null; + while ((values = getLine()) != null) { + records.add(values); + } + if (records.size() > 0) { + ret = new String[records.size()][]; + records.toArray(ret); + } + return ret; + } + + /** + * Parses the CSV according to the given strategy + * and returns the next csv-value as string. + * + * @return next value in the input stream ('null' when end of file) + * @throws IOException on parse error or input read-failure + */ + public String nextValue() throws IOException { + Token tkn = nextToken(); + String ret = null; + switch (tkn.type) { + case TT_TOKEN: + case TT_EORECORD: + ret = tkn.content.toString(); + break; + case TT_EOF: + ret = null; + break; + case TT_INVALID: + default: + // error no token available (or error) + throw new IOException( + "(line " + getLineNumber() + + ") invalid parse sequence"); + // unreachable: break; + } + return ret; + } + + /** + * Parses from the current point in the stream til + * the end of the current line. + * + * @return array of values til end of line + * ('null' when end of file has been reached) + * @throws IOException on parse error or input read-failure + */ + public String[] getLine() throws IOException { + String[] ret = EMPTY_STRING_ARRAY; + record.clear(); + while (true) { + reusableToken.reset(); + nextToken(reusableToken); + switch (reusableToken.type) { + case TT_TOKEN: + record.add(reusableToken.content.toString()); + break; + case TT_EORECORD: + record.add(reusableToken.content.toString()); + break; + case TT_EOF: + if (reusableToken.isReady) { + record.add(reusableToken.content.toString()); + } else { + ret = null; + } + break; + case TT_INVALID: + default: + // error: throw IOException + throw new IOException("(line " + getLineNumber() + ") invalid parse sequence"); + // unreachable: break; + } + if (reusableToken.type != TT_TOKEN) { + break; + } + } + if (!record.isEmpty()) { + ret = (String[]) record.toArray(new String[record.size()]); + } + return ret; + } + + /** + * Returns the current line number in the input stream. + * + * ATTENTION: in case your csv has multiline-values the returned + * number does not correspond to the record-number + * + * @return current line number + */ + public int getLineNumber() { + return in.getLineNumber(); + } + + // ====================================================== + // the lexer(s) + // ====================================================== + + /** + * Convenience method for nextToken(null). + */ + protected Token nextToken() throws IOException { + return nextToken(new Token()); + } + + /** + * Returns the next token. + * + * A token corresponds to a term, a record change or an + * end-of-file indicator. + * + * @param tkn an existing Token object to reuse. The caller is responsible to initialize the + * Token. + * @return the next token found + * @throws IOException on stream access error + */ + protected Token nextToken(Token tkn) throws IOException { + wsBuf.clear(); // resuse + + // get the last read char (required for empty line detection) + int lastChar = in.readAgain(); + + // read the next char and set eol + /* note: unfourtunately isEndOfLine may consumes a character silently. + * this has no effect outside of the method. so a simple workaround + * is to call 'readAgain' on the stream... + * uh: might using objects instead of base-types (jdk1.5 autoboxing!) + */ + int c = in.read(); + boolean eol = isEndOfLine(c); + c = in.readAgain(); + + // empty line detection: eol AND (last char was EOL or beginning) + while (strategy.getIgnoreEmptyLines() && eol + && (lastChar == '\n' + || lastChar == ExtendedBufferedReader.UNDEFINED) + && !isEndOfFile(lastChar)) { + // go on char ahead ... + lastChar = c; + c = in.read(); + eol = isEndOfLine(c); + c = in.readAgain(); + // reached end of file without any content (empty line at the end) + if (isEndOfFile(c)) { + tkn.type = TT_EOF; + return tkn; + } + } + + // did we reached eof during the last iteration already ? TT_EOF + if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) { + tkn.type = TT_EOF; + return tkn; + } + + // important: make sure a new char gets consumed in each iteration + while (!tkn.isReady && tkn.type != TT_EOF) { + // ignore whitespaces at beginning of a token + while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) { + wsBuf.append((char) c); + c = in.read(); + eol = isEndOfLine(c); + } + // ok, start of token reached: comment, encapsulated, or token + if (c == strategy.getCommentStart()) { + // ignore everything till end of line and continue (incr linecount) + in.readLine(); + tkn = nextToken(tkn.reset()); + } else if (c == strategy.getDelimiter()) { + // empty token return TT_TOKEN("") + tkn.type = TT_TOKEN; + tkn.isReady = true; + } else if (eol) { + // empty token return TT_EORECORD("") + //noop: tkn.content.append(""); + tkn.type = TT_EORECORD; + tkn.isReady = true; + } else if (c == strategy.getEncapsulator()) { + // consume encapsulated token + encapsulatedTokenLexer(tkn, c); + } else if (isEndOfFile(c)) { + // end of file return TT_EOF() + //noop: tkn.content.append(""); + tkn.type = TT_EOF; + tkn.isReady = true; + } else { + // next token must be a simple token + // add removed blanks when not ignoring whitespace chars... + if (!strategy.getIgnoreLeadingWhitespaces()) { + tkn.content.append(wsBuf); + } + simpleTokenLexer(tkn, c); + } + } + return tkn; + } + + /** + * A simple token lexer + * + * Simple token are tokens which are not surrounded by encapsulators. + * A simple token might contain escaped delimiters (as \, or \;). The + * token is finished when one of the following conditions become true: + *

    + *
  • end of line has been reached (TT_EORECORD)
  • + *
  • end of stream has been reached (TT_EOF)
  • + *
  • an unescaped delimiter has been reached (TT_TOKEN)
  • + *
+ * + * @param tkn the current token + * @param c the current character + * @return the filled token + * + * @throws IOException on stream access error + */ + private Token simpleTokenLexer(Token tkn, int c) throws IOException { + for (;;) { + if (isEndOfLine(c)) { + // end of record + tkn.type = TT_EORECORD; + tkn.isReady = true; + break; + } else if (isEndOfFile(c)) { + // end of file + tkn.type = TT_EOF; + tkn.isReady = true; + break; + } else if (c == strategy.getDelimiter()) { + // end of token + tkn.type = TT_TOKEN; + tkn.isReady = true; + break; + } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') { + // interpret unicode escaped chars (like \u0070 -> p) + tkn.content.append((char) unicodeEscapeLexer(c)); + } else if (c == strategy.getEscape()) { + tkn.content.append((char)readEscape(c)); + } else { + tkn.content.append((char) c); + } + + c = in.read(); + } + + if (strategy.getIgnoreTrailingWhitespaces()) { + tkn.content.trimTrailingWhitespace(); + } + + return tkn; + } + + + /** + * An encapsulated token lexer + * + * Encapsulated tokens are surrounded by the given encapsulating-string. + * The encapsulator itself might be included in the token using a + * doubling syntax (as "", '') or using escaping (as in \", \'). + * Whitespaces before and after an encapsulated token are ignored. + * + * @param tkn the current token + * @param c the current character + * @return a valid token object + * @throws IOException on invalid state + */ + private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException { + // save current line + int startLineNumber = getLineNumber(); + // ignore the given delimiter + // assert c == delimiter; + for (;;) { + c = in.read(); + + if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead()=='u') { + tkn.content.append((char) unicodeEscapeLexer(c)); + } else if (c == strategy.getEscape()) { + tkn.content.append((char)readEscape(c)); + } else if (c == strategy.getEncapsulator()) { + if (in.lookAhead() == strategy.getEncapsulator()) { + // double or escaped encapsulator -> add single encapsulator to token + c = in.read(); + tkn.content.append((char) c); + } else { + // token finish mark (encapsulator) reached: ignore whitespace till delimiter + for (;;) { + c = in.read(); + if (c == strategy.getDelimiter()) { + tkn.type = TT_TOKEN; + tkn.isReady = true; + return tkn; + } else if (isEndOfFile(c)) { + tkn.type = TT_EOF; + tkn.isReady = true; + return tkn; + } else if (isEndOfLine(c)) { + // ok eo token reached + tkn.type = TT_EORECORD; + tkn.isReady = true; + return tkn; + } else if (!isWhitespace(c)) { + // error invalid char between token and next delimiter + throw new IOException( + "(line " + getLineNumber() + + ") invalid char between encapsulated token end delimiter" + ); + } + } + } + } else if (isEndOfFile(c)) { + // error condition (end of file before end of token) + throw new IOException( + "(startline " + startLineNumber + ")" + + "eof reached before encapsulated token finished" + ); + } else { + // consume character + tkn.content.append((char) c); + } + } + } + + + /** + * Decodes Unicode escapes. + * + * Interpretation of "\\uXXXX" escape sequences + * where XXXX is a hex-number. + * @param c current char which is discarded because it's the "\\" of "\\uXXXX" + * @return the decoded character + * @throws IOException on wrong unicode escape sequence or read error + */ + protected int unicodeEscapeLexer(int c) throws IOException { + int ret = 0; + // ignore 'u' (assume c==\ now) and read 4 hex digits + c = in.read(); + code.clear(); + try { + for (int i = 0; i < 4; i++) { + c = in.read(); + if (isEndOfFile(c) || isEndOfLine(c)) { + throw new NumberFormatException("number too short"); + } + code.append((char) c); + } + ret = Integer.parseInt(code.toString(), 16); + } catch (NumberFormatException e) { + throw new IOException( + "(line " + getLineNumber() + ") Wrong unicode escape sequence found '" + + code.toString() + "'" + e.toString()); + } + return ret; + } + + private int readEscape(int c) throws IOException { + // assume c is the escape char (normally a backslash) + c = in.read(); + int out; + switch (c) { + case 'r': out='\r'; break; + case 'n': out='\n'; break; + case 't': out='\t'; break; + case 'b': out='\b'; break; + case 'f': out='\f'; break; + default : out=c; + } + return out; + } + + // ====================================================== + // strategies + // ====================================================== + + /** + * Obtain the specified CSV Strategy. This should not be modified. + * + * @return strategy currently being used + */ + public CSVStrategy getStrategy() { + return this.strategy; + } + + // ====================================================== + // Character class checker + // ====================================================== + + /** + * @return true if the given char is a whitespace character + */ + private boolean isWhitespace(int c) { + return Character.isWhitespace((char) c) && (c != strategy.getDelimiter()); + } + + /** + * Greedy - accepts \n and \r\n + * This checker consumes silently the second control-character... + * + * @return true if the given character is a line-terminator + */ + private boolean isEndOfLine(int c) throws IOException { + // check if we have \r\n... + if (c == '\r') { + if (in.lookAhead() == '\n') { + // note: does not change c outside of this method !! + c = in.read(); + } + } + return (c == '\n'); + } + + /** + * @return true if the given character indicates end of file + */ + private boolean isEndOfFile(int c) { + return c == ExtendedBufferedReader.END_OF_STREAM; + } +} Index: solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java (revision 0) @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.internal.csv; + +import java.io.StringWriter; +import java.io.StringReader; +import java.io.IOException; + +/** + * Utility methods for dealing with CSV files + */ +public class CSVUtils { + + private static final String[] EMPTY_STRING_ARRAY = new String[0]; + private static final String[][] EMPTY_DOUBLE_STRING_ARRAY = new String[0][0]; + + /** + *

CSVUtils instances should NOT be constructed in + * standard programming. + * + *

This constructor is public to permit tools that require a JavaBean + * instance to operate.

+ */ + public CSVUtils() { + } + + /** + * Converts an array of string values into a single CSV line. All + * null values are converted to the string "null", + * all strings equal to "null" will additionally get quotes + * around. + * + * @param values the value array + * @return the CSV string, will be an empty string if the length of the + * value array is 0 + */ + public static String printLine(String[] values, CSVStrategy strategy) { + // set up a CSVUtils + StringWriter stringWriter = new StringWriter(); + CSVPrinter csvPrinter = new CSVPrinter(stringWriter, strategy); + + // check for null values an "null" as strings and convert them + // into the strings "null" and "\"null\"" + for (int i = 0; i < values.length; i++) { + if (values[i] == null) { + values[i] = "null"; + } else if (values[i].equals("null")) { + values[i] = "\"null\""; + } + } + + // convert to CSV + try { + csvPrinter.println(values); + } catch (IOException e) { + // should not happen with StringWriter + } + // as the resulting string has \r\n at the end, we will trim that away + return stringWriter.toString().trim(); + } + + // ====================================================== + // static parsers + // ====================================================== + + /** + * Parses the given String according to the default {@link CSVStrategy}. + * + * @param s CSV String to be parsed. + * @return parsed String matrix (which is never null) + * @throws IOException in case of error + */ + public static String[][] parse(String s) throws IOException { + if (s == null) { + throw new IllegalArgumentException("Null argument not allowed."); + } + String[][] result = (new CSVParser(new StringReader(s))).getAllValues(); + if (result == null) { + // since CSVStrategy ignores empty lines an empty array is returned + // (i.e. not "result = new String[][] {{""}};") + result = EMPTY_DOUBLE_STRING_ARRAY; + } + return result; + } + + /** + * Parses the first line only according to the default {@link CSVStrategy}. + * + * Parsing empty string will be handled as valid records containing zero + * elements, so the following property holds: parseLine("").length == 0. + * + * @param s CSV String to be parsed. + * @return parsed String vector (which is never null) + * @throws IOException in case of error + */ + public static String[] parseLine(String s) throws IOException { + if (s == null) { + throw new IllegalArgumentException("Null argument not allowed."); + } + // uh,jh: make sure that parseLine("").length == 0 + if (s.length() == 0) { + return EMPTY_STRING_ARRAY; + } + return (new CSVParser(new StringReader(s))).getLine(); + } + +} Index: solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfigGuesser.java (revision 0) @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.solr.internal.csv.writer; + +import java.io.BufferedReader; +import java.io.InputStream; +import java.io.InputStreamReader; + +/** + * Tries to guess a config based on an InputStream. + * + * @author Martin van den Bemt + * @version $Id: $ + */ +public class CSVConfigGuesser { + + /** The stream to read */ + private InputStream in; + /** + * if the file has a field header (need this info, to be able to guess better) + * Defaults to false + */ + private boolean hasFieldHeader = false; + /** The found config */ + protected CSVConfig config; + + /** + * + */ + public CSVConfigGuesser() { + this.config = new CSVConfig(); + } + + /** + * @param in the inputstream to guess from + */ + public CSVConfigGuesser(InputStream in) { + this(); + setInputStream(in); + } + + public void setInputStream(InputStream in) { + this.in = in; + } + + /** + * Allow override. + * @return the inputstream that was set. + */ + protected InputStream getInputStream() { + return in; + } + + /** + * Guess the config based on the first 10 (or less when less available) + * records of a CSV file. + * + * @return the guessed config. + */ + public CSVConfig guess() { + try { + // tralalal + BufferedReader bIn = new BufferedReader(new InputStreamReader((getInputStream()))); + String[] lines = new String[10]; + String line = null; + int counter = 0; + while ( (line = bIn.readLine()) != null && counter <= 10) { + lines[counter] = line; + counter++; + } + if (counter < 10) { + // remove nulls from the array, so we can skip the null checking. + String[] newLines = new String[counter]; + System.arraycopy(lines, 0, newLines, 0, counter); + lines = newLines; + } + analyseLines(lines); + } catch(Exception e) { + e.printStackTrace(); + } finally { + if (in != null) { + try { + in.close(); + } catch(Exception e) { + // ignore exception. + } + } + } + CSVConfig conf = config; + // cleanup the config. + config = null; + return conf; + } + + protected void analyseLines(String[] lines) { + guessFixedWidth(lines); + guessFieldSeperator(lines); + } + + /** + * Guess if this file is fixedwidth. + * Just basing the fact on all lines being of the same length + * @param lines + */ + protected void guessFixedWidth(String[] lines) { + int lastLength = 0; + // assume fixedlength. + config.setFixedWidth(true); + for (int i = 0; i < lines.length; i++) { + if (i == 0) { + lastLength = lines[i].length(); + } else { + if (lastLength != lines[i].length()) { + config.setFixedWidth(false); + } + } + } + } + + + protected void guessFieldSeperator(String[] lines) { + if (config.isFixedWidth()) { + guessFixedWidthSeperator(lines); + return; + } + for (int i = 0; i < lines.length; i++) { + } + } + + protected void guessFixedWidthSeperator(String[] lines) { + // keep track of the fieldlength + int previousMatch = -1; + for (int i = 0; i < lines[0].length(); i++) { + char last = ' '; + boolean charMatches = true; + for (int j = 0; j < lines.length; j++) { + if (j == 0) { + last = lines[j].charAt(i); + } + if (last != lines[j].charAt(i)) { + charMatches = false; + break; + } + } + if (charMatches) { + if (previousMatch == -1) { + previousMatch = 0; + } + CSVField field = new CSVField(); + field.setName("field"+config.getFields().length+1); + field.setSize((i-previousMatch)); + config.addField(field); + } + } + } + /** + * + * @return if the field uses a field header. Defaults to false. + */ + public boolean hasFieldHeader() { + return hasFieldHeader; + } + + /** + * Specify if the CSV file has a field header + * @param hasFieldHeader true or false + */ + public void setHasFieldHeader(boolean hasFieldHeader) { + this.hasFieldHeader = hasFieldHeader; + } + + +} Index: solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/writer/CSVField.java (revision 0) @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.solr.internal.csv.writer; + + +/** + * + * @author Martin van den Bemt + * @version $Id: $ + */ +public class CSVField { + + private String name; + private int size; + private int fill; + private boolean overrideFill; + + /** + * + */ + public CSVField() { + } + + /** + * @param name the name of the field + */ + public CSVField(String name) { + setName(name); + } + + /** + * @param name the name of the field + * @param size the size of the field + */ + public CSVField(String name, int size) { + setName(name); + setSize(size); + } + + /** + * @return the name of the field + */ + public String getName() { + return name; + } + + /** + * Set the name of the field + * @param name the name + */ + public void setName(String name) { + this.name = name; + } + + /** + * + * @return the size of the field + */ + public int getSize() { + return size; + } + + /** + * Set the size of the field. + * The size will be ignored when fixedwidth is set to false in the CSVConfig + * @param size the size of the field. + */ + public void setSize(int size) { + this.size = size; + } + + /** + * @return the fill pattern. + */ + public int getFill() { + return fill; + } + + /** + * Sets overrideFill to true. + * @param fill the file pattern + */ + public void setFill(int fill) { + overrideFill = true; + this.fill = fill; + } + + /** + * Does this field override fill ? + * + * @return + */ + public boolean overrideFill() { + return overrideFill; + } + +} Index: solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/writer/CSVConfig.java (revision 0) @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.solr.internal.csv.writer; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +/** + * The CSVConfig is used to configure the CSV writer + * + * @author Martin van den Bemt + * @version $Id: $ + */ +public class CSVConfig { + + /** specifies if it is a fixed width csv file **/ + private boolean fixedWidth; + /** list of fields **/ + private List fields; + + /** Do no do any filling **/ + public static final int FILLNONE = 0; + /** Fill content the the left. Mainly usable together with fixedWidth **/ + public static final int FILLLEFT = 1; + /** Fill content to the right. Mainly usable together with fixedWidth **/ + public static final int FILLRIGHT = 2; + + /** The fill pattern */ + private int fill; + /** The fill char. Defaults to a space */ + private char fillChar = ' '; + /** The seperator character. Defaults to , */ + private char delimiter = ','; + /** Should we ignore the delimiter. Defaults to false */ + private boolean ignoreDelimiter = false; + /** the value delimiter. Defaults to " */ + private char valueDelimiter = '"'; + /** Should we ignore the value delimiter. Defaults to true */ + private boolean ignoreValueDelimiter = true; + /** Specifies if we want to use a field header */ + private boolean fieldHeader = false; + /** Specifies if the end of the line needs to be trimmed */ + private boolean endTrimmed = false; + /** + * + */ + public CSVConfig() { + super(); + } + + /** + * @return if the CSV file is fixedWidth + */ + public boolean isFixedWidth() { + return fixedWidth; + } + + /** + * Specify if the CSV file is fixed width. + * Defaults to false + * @param fixedWidth the fixedwidth + */ + public void setFixedWidth(boolean fixedWidth) { + this.fixedWidth = fixedWidth; + } + + public void addField(CSVField field) { + if (fields == null) { + fields = new ArrayList(); + } + fields.add(field); + } + + /** + * Set the fields that should be used by the writer. + * This will overwrite currently added fields completely! + * @param csvFields the csvfields array. If null it will do nothing + */ + public void setFields(CSVField[] csvFields) { + if (csvFields == null) { + return; + } + fields = new ArrayList(Arrays.asList(csvFields)); + } + + /** + * Set the fields that should be used by the writer + * @param csvField a collection with fields. If null it will do nothing + */ + public void setFields(Collection csvField) { + if (csvField == null) { + return; + } + fields = new ArrayList(csvField); + } + + /** + * @return an array with the known fields (even if no fields are specified) + */ + public CSVField[] getFields() { + CSVField[] csvFields = new CSVField[0]; + if (fields != null) { + return (CSVField[]) fields.toArray(csvFields); + } + return csvFields; + } + + public CSVField getField(String name) { + if (fields == null || name == null) { + return null; + } + for(int i = 0; i < fields.size(); i++) { + CSVField field = (CSVField) fields.get(i); + if (name.equals(field.getName())) { + return field; + } + } + return null; + } + + /** + * @return the fill pattern. + */ + public int getFill() { + return fill; + } + + /** + * Set the fill pattern. Defaults to {@link #FILLNONE} + *
Other options are : {@link #FILLLEFT} and {@link #FILLRIGHT} + * @param fill the fill pattern. + */ + public void setFill(int fill) { + this.fill = fill; + } + + /** + * + * @return the fillchar. Defaults to a space. + */ + public char getFillChar() { + return fillChar; + } + + /** + * Set the fill char + * @param fillChar the fill char + */ + public void setFillChar(char fillChar) { + this.fillChar = fillChar; + } + + /** + * @return the delimeter used. + */ + public char getDelimiter() { + return delimiter; + } + + /** + * Set the delimiter to use + * @param delimiter the delimiter character. + */ + public void setDelimiter(char delimiter) { + this.delimiter = delimiter; + } + + /** + * @return if the writer should ignore the delimiter character. + */ + public boolean isDelimiterIgnored() { + return ignoreDelimiter; + } + + /** + * Specify if the writer should ignore the delimiter. + * @param ignoreDelimiter defaults to false. + */ + public void setIgnoreDelimiter(boolean ignoreDelimiter) { + this.ignoreDelimiter = ignoreDelimiter; + } + + /** + * @return the value delimeter used. Defaults to " + */ + public char getValueDelimiter() { + return valueDelimiter; + } + + /** + * Set the value delimiter to use + * @param valueDelimiter the value delimiter character. + */ + public void setValueDelimiter(char valueDelimiter) { + this.valueDelimiter = valueDelimiter; + } + + /** + * @return if the writer should ignore the value delimiter character. + * Defaults to true. + */ + public boolean isValueDelimiterIgnored() { + return ignoreValueDelimiter; + } + + /** + * Specify if the writer should ignore the value delimiter. + * @param ignoreValueDelimiter defaults to false. + */ + public void setIgnoreValueDelimiter(boolean ignoreValueDelimiter) { + this.ignoreValueDelimiter = ignoreValueDelimiter; + } + + /** + * @return if a field header is used. Defaults to false + */ + public boolean isFieldHeader() { + return fieldHeader; + } + /** + * Specify if you want to use a field header. + * @param fieldHeader true or false. + */ + public void setFieldHeader(boolean fieldHeader) { + this.fieldHeader = fieldHeader; + } + + /** + * TODO.. + * @see java.lang.Object#equals(java.lang.Object) + */ + public boolean equals(Object obj) { + if (obj == null && !(obj instanceof CSVConfig)) { + return false; + } + return super.equals(obj); +// CSVConfig config = (CSVConfig) obj; +// getFill() == config.getFill() +// getFields().equals(config.getFields()) + } + + /** + * Creates a config based on a stream. It tries to guess
+ * NOTE : The stream will be closed. + * @param inputStream the inputstream. + * @return the guessed config. + */ + public static CSVConfig guessConfig(InputStream inputStream) { + return null; + } + + /** + * @return if the end of the line should be trimmed. Default is false. + */ + public boolean isEndTrimmed() { + return endTrimmed; + } + + /** + * Specify if the end of the line needs to be trimmed. Defaults to false. + * @param endTrimmed + */ + public void setEndTrimmed(boolean endTrimmed) { + this.endTrimmed = endTrimmed; + } + + +} Index: solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/writer/CSVWriter.java (revision 0) @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.solr.internal.csv.writer; + +import java.io.Writer; +import java.util.Arrays; +import java.util.Map; + + +/** + * CSVWriter + * + * @author Martin van den Bemt + * @version $Id: $ + */ +public class CSVWriter { + + /** The CSV config **/ + private CSVConfig config; + /** The writer **/ + private Writer writer; + /** + * + */ + public CSVWriter() { + } + + public CSVWriter(CSVConfig config) { + setConfig(config); + } + + public void writeRecord(Map map) { + CSVField[] fields = config.getFields(); + try { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < fields.length; i++) { + Object o = map.get(fields[i].getName()); + if (o != null) { + String value = o.toString(); + value = writeValue(fields[i], value); + sb.append(value); + } + if (!config.isDelimiterIgnored() && fields.length != (i+1)) { + sb.append(config.getDelimiter()); + } + } + if (config.isEndTrimmed()) { + for (int i = sb.length()-1; i >= 0; i--) { + System.out.println("i : " + i); + if (Character.isWhitespace(sb.charAt(i))) { + sb.deleteCharAt(i); + } else { + break; + } + } + } + sb.append("\n"); + String line = sb.toString(); + writer.write(line); + } catch(Exception e) { + e.printStackTrace(); + } + } + + protected String writeValue(CSVField field, String value) throws Exception { + if (config.isFixedWidth()) { + if (value.length() < field.getSize()) { + int fillPattern = config.getFill(); + if (field.overrideFill()) { + fillPattern = field.getFill(); + } + StringBuffer sb = new StringBuffer(); + int fillSize = (field.getSize() - value.length()); + char[] fill = new char[fillSize]; + Arrays.fill(fill, config.getFillChar()); + if (fillPattern == CSVConfig.FILLLEFT) { + sb.append(fill); + sb.append(value); + value = sb.toString(); + } else { + // defaults to fillpattern FILLRIGHT when fixedwidth is used + sb.append(value); + sb.append(fill); + value = sb.toString(); + } + } else if (value.length() > field.getSize()) { + // value to big.. + value = value.substring(0, field.getSize()); + } + if (!config.isValueDelimiterIgnored()) { + // add the value delimiter.. + value = config.getValueDelimiter()+value+config.getValueDelimiter(); + } + } + return value; + } + /** + * @return the CVSConfig or null if not present + */ + public CSVConfig getConfig() { + return config; + } + + /** + * Set the CSVConfig + * @param config the CVSConfig + */ + public void setConfig(CSVConfig config) { + this.config = config; + } + + /** + * Set the writer to write the CSV file to. + * @param writer the writer. + */ + public void setWriter(Writer writer) { + this.writer = writer; + } + +} Index: solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java =================================================================== --- solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java (revision 0) +++ solr/core/src/java/org/apache/solr/internal/csv/CSVStrategy.java (revision 0) @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.internal.csv; + +import java.io.Serializable; + +/** + * CSVStrategy + * + * Represents the strategy for a CSV. + */ +public class CSVStrategy implements Cloneable, Serializable { + + private char delimiter; + private char encapsulator; + private char commentStart; + private char escape; + private boolean ignoreLeadingWhitespaces; + private boolean ignoreTrailingWhitespaces; + private boolean interpretUnicodeEscapes; + private boolean ignoreEmptyLines; + + // controls for output + private String printerNewline = "\n"; + + // -2 is used to signal disabled, because it won't be confused with + // an EOF signal (-1), and because \ufffe in UTF-16 would be + // encoded as two chars (using surrogates) and thus there should never + // be a collision with a real text char. + public static char COMMENTS_DISABLED = (char)-2; + public static char ESCAPE_DISABLED = (char)-2; + public static char ENCAPSULATOR_DISABLED = (char)-2; + + public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, + true, false, true); + public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, false, + false, false, false); + public static CSVStrategy TDF_STRATEGY = new CSVStrategy('\t', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, + true, false, true); + + + public CSVStrategy(char delimiter, char encapsulator, char commentStart) { + this(delimiter, encapsulator, commentStart, true, false, true); + } + + /** + * Customized CSV strategy setter. + * + * @param delimiter a Char used for value separation + * @param encapsulator a Char used as value encapsulation marker + * @param commentStart a Char used for comment identification + * @param ignoreLeadingWhitespace TRUE when leading whitespaces should be + * ignored + * @param interpretUnicodeEscapes TRUE when unicode escapes should be + * interpreted + * @param ignoreEmptyLines TRUE when the parser should skip emtpy lines + */ + public CSVStrategy( + char delimiter, + char encapsulator, + char commentStart, + char escape, + boolean ignoreLeadingWhitespace, + boolean ignoreTrailingWhitespace, + boolean interpretUnicodeEscapes, + boolean ignoreEmptyLines) + { + setDelimiter(delimiter); + setEncapsulator(encapsulator); + setCommentStart(commentStart); + setEscape(escape); + setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace); + setIgnoreTrailingWhitespaces(ignoreTrailingWhitespace); + setUnicodeEscapeInterpretation(interpretUnicodeEscapes); + setIgnoreEmptyLines(ignoreEmptyLines); + } + + /** @deprecated */ + public CSVStrategy( + char delimiter, + char encapsulator, + char commentStart, + boolean ignoreLeadingWhitespace, + boolean interpretUnicodeEscapes, + boolean ignoreEmptyLines) + { + this(delimiter, encapsulator, commentStart, CSVStrategy.ESCAPE_DISABLED, ignoreLeadingWhitespace, + true, interpretUnicodeEscapes, ignoreEmptyLines); + } + + public void setDelimiter(char delimiter) { this.delimiter = delimiter; } + public char getDelimiter() { return this.delimiter; } + + public void setEncapsulator(char encapsulator) { this.encapsulator = encapsulator; } + public char getEncapsulator() { return this.encapsulator; } + + public void setCommentStart(char commentStart) { this.commentStart = commentStart; } + public char getCommentStart() { return this.commentStart; } + public boolean isCommentingDisabled() { return this.commentStart == COMMENTS_DISABLED; } + + public void setEscape(char escape) { this.escape = escape; } + public char getEscape() { return this.escape; } + + public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { + this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; + } + public boolean getIgnoreLeadingWhitespaces() { return this.ignoreLeadingWhitespaces; } + + public void setIgnoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) { + this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces; + } + public boolean getIgnoreTrailingWhitespaces() { return this.ignoreTrailingWhitespaces; } + + public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) { + this.interpretUnicodeEscapes = interpretUnicodeEscapes; + } + public boolean getUnicodeEscapeInterpretation() { return this.interpretUnicodeEscapes; } + + public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; } + public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; } + + public void setPrinterNewline(String newline) { + this.printerNewline = newline; + } + public String getPrinterNewline() { + return this.printerNewline; + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); // impossible + } + } +} Index: solr/core/src/java/org/apache/solr/handler/JsonLoader.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/JsonLoader.java (revision 1306785) +++ solr/core/src/java/org/apache/solr/handler/JsonLoader.java (working copy) @@ -24,9 +24,9 @@ import java.util.Stack; import org.apache.commons.io.IOUtils; -import org.apache.noggit.JSONParser; -import org.apache.noggit.JSONUtil; -import org.apache.noggit.ObjectBuilder; +import org.apache.solr.internal.noggit.JSONParser; +import org.apache.solr.internal.noggit.JSONUtil; +import org.apache.solr.internal.noggit.ObjectBuilder; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; Index: solr/lib/apache-solr-noggit-pom.xml.template =================================================================== --- solr/lib/apache-solr-noggit-pom.xml.template (revision 1306785) +++ solr/lib/apache-solr-noggit-pom.xml.template (working copy) @@ -1,36 +0,0 @@ - - - - - - org.apache.solr - solr-parent - @version@ - - 4.0.0 - org.apache.solr - solr-noggit - Solr Specific Noggit - @version@ - Solr Specific Noggit r1211150 - jar - Index: solr/lib/apache-solr-noggit-r1211150.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: solr/lib/apache-solr-commons-csv-1.0-SNAPSHOT-r966014.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: solr/lib/apache-solr-noggit-LICENSE-ASL.txt =================================================================== --- solr/lib/apache-solr-noggit-LICENSE-ASL.txt (revision 1306785) +++ solr/lib/apache-solr-noggit-LICENSE-ASL.txt (working copy) @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. Index: solr/lib/apache-solr-commons-csv-pom.xml.template =================================================================== --- solr/lib/apache-solr-commons-csv-pom.xml.template (revision 1306785) +++ solr/lib/apache-solr-commons-csv-pom.xml.template (working copy) @@ -1,36 +0,0 @@ - - - - - - org.apache.solr - solr-parent - @version@ - - 4.0.0 - org.apache.solr - solr-commons-csv - Solr Specific Commons CSV - @version@ - Solr Specific Commons CSV v1.0-SNAPSHOT-r966014 - jar - Index: solr/lib/apache-solr-noggit-NOTICE.txt =================================================================== --- solr/lib/apache-solr-noggit-NOTICE.txt (revision 1306785) +++ solr/lib/apache-solr-noggit-NOTICE.txt (working copy) @@ -1,2 +0,0 @@ -This product includes software developed by -The Apache Software Foundation (http://www.apache.org/). Index: solr/lib/apache-solr-commons-csv-NOTICE.txt =================================================================== --- solr/lib/apache-solr-commons-csv-NOTICE.txt (revision 1306785) +++ solr/lib/apache-solr-commons-csv-NOTICE.txt (working copy) @@ -1,9 +0,0 @@ -Apache Commons CSV -Copyright 2005-2006 The Apache Software Foundation - -This product includes software developed by -The Apache Software Foundation (http://www.apache.org/). - -This JAR artifact contains a pre-release version of -Apache Commons CSV, that was jarjar'ed to have a Solr -private package name. Don't use it outside of Solr. Index: solr/lib/apache-solr-commons-csv-LICENSE-ASL.txt =================================================================== --- solr/lib/apache-solr-commons-csv-LICENSE-ASL.txt (revision 1306785) +++ solr/lib/apache-solr-commons-csv-LICENSE-ASL.txt (working copy) @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.