Index: C:/Workspace/mime4j/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java =================================================================== --- C:/Workspace/mime4j/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java (revision 554413) +++ C:/Workspace/mime4j/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java (working copy) @@ -438,7 +438,6 @@ try { String expected = IOUtils.toString(new FileInputStream(xmlFile), "ISO8859-1"); - assertEquals("Error parsing " + f.getName(), expected, result); } catch (FileNotFoundException e) { FileOutputStream fos = new FileOutputStream(xmlFileMime4j); Index: C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeTokenStream.java =================================================================== --- C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeTokenStream.java (revision 0) +++ C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeTokenStream.java (revision 0) @@ -0,0 +1,538 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.mime4j; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.james.mime4j.decoder.Base64InputStream; +import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; + + +/** + *

+ * Parses MIME (or RFC822) message streams of bytes or characters. + * The stream is converted into an event stream. + *

+ *

+ * Typical usage: + *

+ *
+ *      MimeTokenStream stream = new MimeTokenStream();
+ *      stream.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
+ *      for (int state = stream.getState();
+ *           state != MimeTokenStream.T_END_OF_STREAM;
+ *           state = stream.next()) {
+ *          switch (state) {
+ *            case MimeTokenStream.T_BODY:
+ *              System.out.println("Body detected, contents = "
+ *                + stream.getInputStream() + ", header data = "
+ *                + stream.getBodyDescriptor());
+ *              break;
+ *            case MimeTokenStream.T_FIELD:
+ *              System.out.println("Header field detected: "
+ *                + stream.getField());
+ *              break;
+ *            case MimeTokenStream.T_START_MULTIPART:
+ *              System.out.println("Multipart message detexted,"
+ *                + " header data = "
+ *                + stream.getBodyDescriptor());
+ *            ...
+ *          }
+ *      }
+ * 
+ *

+ * NOTE: All lines must end with CRLF + * (\r\n). If you are unsure of the line endings in your stream + * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} + * instance.

+ *

Instances of {@link MimeTokenStream} are reusable: Invoking the + * method {@link #parse(InputStream)} resets the token streams internal + * state. However, they are definitely not thread safe. If you + * have a multi threaded application, then the suggested use is to have + * one instance per thread.

+ * + * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $ + */ +public class MimeTokenStream { + private static final Log log = LogFactory.getLog(MimeStreamParser.class); + + /** + * This token indicates, that the MIME stream has been completely + * and successfully parsed, and no more data is available. + */ + public static final int T_END_OF_STREAM = -1; + /** + * This token indicates, that the MIME stream is currently + * at the beginning of a message. + */ + public static final int T_START_MESSAGE = 0; + /** + * This token indicates, that the MIME stream is currently + * at the end of a message. + */ + public static final int T_END_MESSAGE = 1; + /** + * This token indicates, that a raw entity is currently being processed. + * You may call {@link #getInputStream()} to obtain the raw entity + * data. + */ + public static final int T_RAW_ENTITY = 2; + /** + * This token indicates, that a message parts headers are now + * being parsed. + */ + public static final int T_START_HEADER = 3; + /** + * This token indicates, that a message parts field has now + * been parsed. You may call {@link #getField()} to obtain the + * raw field contents. + */ + public static final int T_FIELD = 4; + /** + * This token indicates, that part headers have now been + * parsed. + */ + public static final int T_END_HEADER = 5; + /** + * This token indicates, that a multipart body is being parsed. + */ + public static final int T_START_MULTIPART = 6; + /** + * This token indicates, that a multipart body has been parsed. + */ + public static final int T_END_MULTIPART = 7; + /** + * This token indicates, that a multiparts preamble is being + * parsed. You may call {@link #getInputStream()} to access the + * preamble contents. + */ + public static final int T_PREAMBLE = 8; + /** + * This token indicates, that a multiparts epilogue is being + * parsed. You may call {@link #getInputStream()} to access the + * epilogue contents. + */ + public static final int T_EPILOGUE = 9; + /** + * This token indicates, that the MIME stream is currently + * at the beginning of a body part. + */ + public static final int T_START_BODYPART = 10; + /** + * This token indicates, that the MIME stream is currently + * at the end of a body part. + */ + public static final int T_END_BODYPART = 11; + /** + * This token indicates, that an atomic entity is being parsed. + * Use {@link #getInputStream()} to access the entity contents. + */ + public static final int T_BODY = 12; + /** + * Internal state, not exposed. + */ + private static final int T_IN_BODYPART = -2; + /** + * Internal state, not exposed. + */ + private static final int T_IN_MESSAGE = -3; + + private static final BitSet fieldChars = new BitSet(); + static { + for (int i = 0x21; i <= 0x39; i++) { + fieldChars.set(i); + } + for (int i = 0x3b; i <= 0x7e; i++) { + fieldChars.set(i); + } + } + + abstract static class StateMachine { + int state; + abstract int next() throws IOException, MimeException; + } + + private static class RawEntity extends StateMachine { + private InputStream stream; + RawEntity(InputStream stream) { + this.stream = stream; + state = T_RAW_ENTITY; + } + int next() { + state = T_END_OF_STREAM; + return state; + } + } + + private abstract class Entity extends StateMachine { + private final BodyDescriptor parent; + private final InputStream contents; + private final StringBuffer sb = new StringBuffer(); + private BodyDescriptor body; + private int pos, start; + private int lineNumber, startLineNumber; + private final int endState; + private MimeBoundaryInputStream mbis; + InputStream stream; + String field; + + Entity(InputStream contents, BodyDescriptor parent, int startState, int endState) { + this.parent = parent; + this.contents = contents; + state = startState; + this.endState = endState; + } + + private void setParsingFieldState() { + state = parseField() ? T_FIELD : T_END_HEADER; + } + + private int setParseBodyPartState() throws IOException { + mbis.consume(); + if (mbis.parentEOF()) { + if (log.isWarnEnabled()) { + log.warn("Line " + rootInputStream.getLineNumber() + + ": Body part ended prematurely. " + + "Higher level boundary detected or " + + "EOF reached."); + } + } else { + if (mbis.hasMoreParts()) { + mbis = new MimeBoundaryInputStream(contents, body.getBoundary()); + if (isRaw()) { + currentStateMachine = new RawEntity(mbis); + } else { + currentStateMachine = new BodyPart(mbis, body); + } + entities.add(currentStateMachine); + state = T_IN_BODYPART; + return currentStateMachine.state; + } + } + state = T_EPILOGUE; + stream = new CloseShieldInputStream(contents); + return T_EPILOGUE; + } + + int next() throws IOException, MimeException { + switch (state) { + case T_START_MESSAGE: + case T_START_BODYPART: + state = T_START_HEADER; + break; + case T_START_HEADER: + initHeaderParsing(); + setParsingFieldState(); + break; + case T_FIELD: + setParsingFieldState(); + break; + case T_END_HEADER: + if (body.isMultipart()) { + state = T_START_MULTIPART; + } else if (body.isMessage()) { + InputStream is = contents; + if (body.isBase64Encoded()) { + log.warn("base64 encoded message/rfc822 detected"); + is = new EOLConvertingInputStream(new Base64InputStream(contents)); + } else if (body.isQuotedPrintableEncoded()) { + log.warn("quoted-printable encoded message/rfc822 detected"); + is = new EOLConvertingInputStream(new QuotedPrintableInputStream(contents)); + } + state = endState; + return parseMessage(is, body); + } else { + stream = new CloseShieldInputStream(contents); + state = T_BODY; + break; + } + break; + case T_START_MULTIPART: + mbis = new MimeBoundaryInputStream(contents, body.getBoundary()); + stream = new CloseShieldInputStream(mbis); + state = T_PREAMBLE; + break; + case T_PREAMBLE: + return setParseBodyPartState(); + case T_IN_BODYPART: + return setParseBodyPartState(); + case T_EPILOGUE: + state = T_END_MULTIPART; + break; + case T_BODY: + case T_END_MULTIPART: + case T_IN_MESSAGE: + state = endState; + break; + default: + if (state == endState) { + state = T_END_OF_STREAM; + break; + } + throw new IllegalStateException("Invalid state: " + state); + } + return state; + } + + private void initHeaderParsing() throws IOException { + body = new BodyDescriptor(parent); + startLineNumber = lineNumber = rootInputStream.getLineNumber(); + + int curr = 0; + int prev = 0; + while ((curr = contents.read()) != -1) { + if (curr == '\n' && (prev == '\n' || prev == 0)) { + /* + * [\r]\n[\r]\n or an immediate \r\n have been seen. + */ + sb.deleteCharAt(sb.length() - 1); + break; + } + sb.append((char) curr); + prev = curr == '\r' ? prev : curr; + } + + if (curr == -1 && log.isWarnEnabled()) { + log.warn("Line " + rootInputStream.getLineNumber() + + ": Unexpected end of headers detected. " + + "Boundary detected in header or EOF reached."); + } + } + + private boolean parseField() { + while (pos < sb.length()) { + while (pos < sb.length() && sb.charAt(pos) != '\r') { + pos++; + } + if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') { + pos++; + continue; + } + if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) { + /* + * field should be the complete field data excluding the + * trailing \r\n. + */ + field = sb.substring(start, pos); + start = pos + 2; + + /* + * Check for a valid field. + */ + int index = field.indexOf(':'); + boolean valid = false; + if (index != -1 && fieldChars.get(field.charAt(0))) { + valid = true; + String fieldName = field.substring(0, index).trim(); + for (int i = 0; i < fieldName.length(); i++) { + if (!fieldChars.get(fieldName.charAt(i))) { + valid = false; + break; + } + } + if (valid) { + body.addField(fieldName, field.substring(index + 1)); + startLineNumber = lineNumber; + pos += 2; + lineNumber++; + return true; + } + } + if (log.isWarnEnabled()) { + log.warn("Line " + startLineNumber + + ": Ignoring invalid field: '" + field.trim() + "'"); + } + startLineNumber = lineNumber; + } + pos += 2; + lineNumber++; + } + return false; + } + } + + private class Message extends Entity { + Message(InputStream contents, BodyDescriptor parent) { + super(contents, parent, T_START_MESSAGE, T_END_MESSAGE); + } + } + + private class BodyPart extends Entity { + BodyPart(InputStream contents, BodyDescriptor parent) { + super(contents, parent, T_START_BODYPART, T_END_BODYPART); + } + } + + private int state = T_END_OF_STREAM; + private RootInputStream rootInputStream; + private StateMachine currentStateMachine; + private final List entities = new ArrayList(); + private boolean raw; + + /** Instructs the {@code MimeTokenStream} to parse the given streams contents. + * If the {@code MimeTokenStream} has already been in use, resets the streams + * internal state. + */ + public void parse(InputStream stream) { + entities.clear(); + rootInputStream = new RootInputStream(stream); + state = parseMessage(rootInputStream, null); + } + + private int parseMessage(InputStream pStream, BodyDescriptor parent) { + if (isRaw()) { + currentStateMachine = new RawEntity(pStream); + } else { + currentStateMachine = new Message(pStream, parent); + } + entities.add(currentStateMachine); + return currentStateMachine.state; + } + + /** + * Determines if this parser is currently in raw mode. + * + * @return true if in raw mode, false + * otherwise. + * @see #setRaw(boolean) + */ + public boolean isRaw() { + return raw; + } + + /** + * Enables or disables raw mode. In raw mode all future entities + * (messages or body parts) in the stream will be reported to the + * {@link ContentHandler#raw(InputStream)} handler method only. + * The stream will contain the entire unparsed entity contents + * including header fields and whatever is in the body. + * + * @param raw true enables raw mode, false + * disables it. + */ + public void setRaw(boolean raw) { + this.raw = raw; + } + + /** + * Finishes the parsing and stops reading lines. + * NOTE: No more lines will be parsed but the parser + * will still call + * {@link ContentHandler#endMultipart()}, + * {@link ContentHandler#endBodyPart()}, + * {@link ContentHandler#endMessage()}, etc to match previous calls + * to + * {@link ContentHandler#startMultipart(BodyDescriptor)}, + * {@link ContentHandler#startBodyPart()}, + * {@link ContentHandler#startMessage()}, etc. + */ + public void stop() { + rootInputStream.truncate(); + } + + /** + * Returns the current state. + */ + public int getState() { + return state; + } + + /** + * This method is valid, if {@link #getState()} returns {@link #T_FIELD}. + * @return String with the fields raw contents. + * @throws IllegalStateException {@link #getState()} returns another + * value than {@link #T_FIELD}. + */ + public String getField() { + switch (getState()) { + case T_FIELD: + return ((Entity) currentStateMachine).field; + default: + throw new IllegalStateException("Expected state to be T_FIELD."); + } + } + + /** + * This method is valid, if {@link #getState()} returns either of + * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}. + * It returns the raw entity, preamble, or epilogue contents. + * @return Data stream, depending on the current state. + * @throws IllegalStateException {@link #getState()} returns an + * invalid value. + */ + public InputStream getInputStream() { + switch (getState()) { + case T_RAW_ENTITY: + return ((RawEntity) currentStateMachine).stream; + case T_PREAMBLE: + case T_EPILOGUE: + case T_BODY: + return ((Entity) currentStateMachine).stream; + default: + throw new IllegalStateException("Expected state to be either of T_RAW_ENTITY, T_PREAMBLE, or T_EPILOGUE."); + } + } + + /** + * This method is valid, if {@link #getState()} returns + * {@link #T_BODY}, or {@link #T_START_MULTIPART}. It returns the current + * entities body descriptor. + */ + public BodyDescriptor getBodyDescriptor() { + switch (getState()) { + case T_BODY: + case T_START_MULTIPART: + return ((Entity) currentStateMachine).body; + default: + throw new IllegalStateException("Expected state to be T_BODY."); + } + } + + /** + * This method advances the token stream to the next token. + * @throws IllegalStateException The method has been called, although + * {@link #getState()} was already {@link #T_END_OF_STREAM}. + */ + public int next() throws IOException, MimeException { + if (state == T_END_OF_STREAM || currentStateMachine == null) { + throw new IllegalStateException("No more tokens are available."); + } + while (currentStateMachine != null) { + state = currentStateMachine.next(); + if (state != T_END_OF_STREAM) { + return state; + } + entities.remove(entities.size()-1); + if (entities.size() == 0) { + currentStateMachine = null; + } else { + currentStateMachine = (StateMachine) entities.get(entities.size()-1); + } + } + state = T_END_OF_STREAM; + return state; + } +} Index: C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeStreamParser.java =================================================================== --- C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeStreamParser.java (revision 554413) +++ C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeStreamParser.java (working copy) @@ -21,14 +21,7 @@ import java.io.IOException; import java.io.InputStream; -import java.util.BitSet; -import java.util.LinkedList; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.james.mime4j.decoder.Base64InputStream; -import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; - /** *

* Parses MIME (or RFC822) message streams of bytes or characters and reports @@ -50,30 +43,8 @@ * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $ */ public class MimeStreamParser { - private static final Log log = LogFactory.getLog(MimeStreamParser.class); - - private static BitSet fieldChars = null; - - private RootInputStream rootStream = null; - private LinkedList bodyDescriptors = new LinkedList(); private ContentHandler handler = null; - private boolean raw = false; - - static { - fieldChars = new BitSet(); - for (int i = 0x21; i <= 0x39; i++) { - fieldChars.set(i); - } - for (int i = 0x3b; i <= 0x7e; i++) { - fieldChars.set(i); - } - } - - /** - * Creates a new MimeStreamParser instance. - */ - public MimeStreamParser() { - } + private final MimeTokenStream mimeTokenStream = new MimeTokenStream(); /** * Parses a stream of bytes containing a MIME message. @@ -83,8 +54,56 @@ * @throws IOException on I/O errors. */ public void parse(InputStream is) throws MimeException, IOException { - rootStream = new RootInputStream(is); - parseMessage(rootStream); + mimeTokenStream.parse(is); + OUTER: for (;;) { + int state = mimeTokenStream.getState(); + switch (state) { + case MimeTokenStream.T_BODY: + handler.body(mimeTokenStream.getBodyDescriptor(), mimeTokenStream.getInputStream()); + break; + case MimeTokenStream.T_END_BODYPART: + handler.endBodyPart(); + break; + case MimeTokenStream.T_END_HEADER: + handler.endHeader(); + break; + case MimeTokenStream.T_END_MESSAGE: + handler.endMessage(); + break; + case MimeTokenStream.T_END_MULTIPART: + handler.endMultipart(); + break; + case MimeTokenStream.T_END_OF_STREAM: + break OUTER; + case MimeTokenStream.T_EPILOGUE: + handler.epilogue(mimeTokenStream.getInputStream()); + break; + case MimeTokenStream.T_FIELD: + handler.field(mimeTokenStream.getField()); + break; + case MimeTokenStream.T_PREAMBLE: + handler.preamble(mimeTokenStream.getInputStream()); + break; + case MimeTokenStream.T_RAW_ENTITY: + handler.raw(mimeTokenStream.getInputStream()); + break; + case MimeTokenStream.T_START_BODYPART: + handler.startBodyPart(); + break; + case MimeTokenStream.T_START_HEADER: + handler.startHeader(); + break; + case MimeTokenStream.T_START_MESSAGE: + handler.startMessage(); + break; + case MimeTokenStream.T_START_MULTIPART: + handler.startMultipart(mimeTokenStream.getBodyDescriptor()); + break; + default: + throw new IllegalStateException("Invalid state: " + state); + } + state = mimeTokenStream.next(); + } } /** @@ -95,7 +114,7 @@ * @see #setRaw(boolean) */ public boolean isRaw() { - return raw; + return mimeTokenStream.isRaw(); } /** @@ -109,7 +128,7 @@ * disables it. */ public void setRaw(boolean raw) { - this.raw = raw; + mimeTokenStream.setRaw(raw); } /** @@ -125,196 +144,10 @@ * {@link ContentHandler#startMessage()}, etc. */ public void stop() { - rootStream.truncate(); + mimeTokenStream.stop(); } /** - * Parses an entity which consists of a header followed by a body containing - * arbitrary data, body parts or an embedded message. - * - * @param is the stream to parse. - * @throws MimeException if the entity can not be processed - * @throws IOException on I/O errors. - */ - private void parseEntity(InputStream is) throws MimeException, IOException { - BodyDescriptor bd = parseHeader(is); - - if (bd.isMultipart()) { - bodyDescriptors.addFirst(bd); - - handler.startMultipart(bd); - - MimeBoundaryInputStream tempIs = - new MimeBoundaryInputStream(is, bd.getBoundary()); - handler.preamble(new CloseShieldInputStream(tempIs)); - tempIs.consume(); - - while (tempIs.hasMoreParts()) { - tempIs = new MimeBoundaryInputStream(is, bd.getBoundary()); - parseBodyPart(tempIs); - tempIs.consume(); - if (tempIs.parentEOF()) { - if (log.isWarnEnabled()) { - log.warn("Line " + rootStream.getLineNumber() - + ": Body part ended prematurely. " - + "Higher level boundary detected or " - + "EOF reached."); - } - break; - } - } - - handler.epilogue(new CloseShieldInputStream(is)); - - handler.endMultipart(); - - bodyDescriptors.removeFirst(); - - } else if (bd.isMessage()) { - if (bd.isBase64Encoded()) { - log.warn("base64 encoded message/rfc822 detected"); - is = new EOLConvertingInputStream( - new Base64InputStream(is)); - } else if (bd.isQuotedPrintableEncoded()) { - log.warn("quoted-printable encoded message/rfc822 detected"); - is = new EOLConvertingInputStream( - new QuotedPrintableInputStream(is)); - } - bodyDescriptors.addFirst(bd); - parseMessage(is); - bodyDescriptors.removeFirst(); - } else { - handler.body(bd, new CloseShieldInputStream(is)); - } - - /* - * Make sure the stream has been consumed. - */ - while (is.read() != -1) { - } - } - - private void parseMessage(InputStream is) - throws MimeException, IOException { - if (raw) { - handler.raw(new CloseShieldInputStream(is)); - } else { - handler.startMessage(); - parseEntity(is); - handler.endMessage(); - } - } - - private void parseBodyPart(InputStream is) - throws MimeException, IOException { - if (raw) { - handler.raw(new CloseShieldInputStream(is)); - } else { - handler.startBodyPart(); - parseEntity(is); - handler.endBodyPart(); - } - } - - /** - * Parses a header. - * - * @param is the stream to parse. - * @return a BodyDescriptor describing the body following - * the header. - * @throws MimeException if the header can not be processed - * @throws IOException on I/O errors - */ - private BodyDescriptor parseHeader(InputStream is) - throws MimeException, IOException { - BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty() - ? null : (BodyDescriptor) bodyDescriptors.getFirst()); - - handler.startHeader(); - - int lineNumber = rootStream.getLineNumber(); - - StringBuffer sb = new StringBuffer(); - int curr = 0; - int prev = 0; - while ((curr = is.read()) != -1) { - if (curr == '\n' && (prev == '\n' || prev == 0)) { - /* - * [\r]\n[\r]\n or an immediate \r\n have been seen. - */ - sb.deleteCharAt(sb.length() - 1); - break; - } - sb.append((char) curr); - prev = curr == '\r' ? prev : curr; - } - - if (curr == -1 && log.isWarnEnabled()) { - log.warn("Line " + rootStream.getLineNumber() - + ": Unexpected end of headers detected. " - + "Boundary detected in header or EOF reached."); - } - - int start = 0; - int pos = 0; - int startLineNumber = lineNumber; - while (pos < sb.length()) { - while (pos < sb.length() && sb.charAt(pos) != '\r') { - pos++; - } - if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') { - pos++; - continue; - } - - if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) { - - /* - * field should be the complete field data excluding the - * trailing \r\n. - */ - String field = sb.substring(start, pos); - start = pos + 2; - - /* - * Check for a valid field. - */ - int index = field.indexOf(':'); - boolean valid = false; - if (index != -1 && fieldChars.get(field.charAt(0))) { - valid = true; - String fieldName = field.substring(0, index).trim(); - for (int i = 0; i < fieldName.length(); i++) { - if (!fieldChars.get(fieldName.charAt(i))) { - valid = false; - break; - } - } - - if (valid) { - handler.field(field); - bd.addField(fieldName, field.substring(index + 1)); - } - } - - if (!valid && log.isWarnEnabled()) { - log.warn("Line " + startLineNumber - + ": Ignoring invalid field: '" + field.trim() + "'"); - } - - startLineNumber = lineNumber; - } - - pos += 2; - lineNumber++; - } - - handler.endHeader(); - - return bd; - } - - /** * Sets the ContentHandler to use when reporting * parsing events. * Index: C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java =================================================================== --- C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java (revision 554413) +++ C:/Workspace/mime4j/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java (working copy) @@ -36,11 +36,11 @@ */ public class MimeBoundaryInputStream extends InputStream { - private PushbackInputStream s = null; - private byte[] boundary = null; + private PushbackInputStream s; + private byte[] boundary; private boolean first = true; - private boolean eof = false; - private boolean parenteof = false; + private boolean eof; + private boolean parenteof; private boolean moreParts = true; /** Index: C:/Workspace/mime4j/src/site/site.xml =================================================================== --- C:/Workspace/mime4j/src/site/site.xml (revision 554413) +++ C:/Workspace/mime4j/src/site/site.xml (working copy) @@ -32,7 +32,8 @@

- + + Index: C:/Workspace/mime4j/src/site/apt/usage.apt =================================================================== --- C:/Workspace/mime4j/src/site/apt/usage.apt (revision 0) +++ C:/Workspace/mime4j/src/site/apt/usage.apt (revision 0) @@ -0,0 +1,191 @@ + +~~ Licensed to the Apache Software Foundation (ASF) under one +~~ or more contributor license agreements. See the NOTICE file +~~ distributed with this work for additional information +~~ regarding copyright ownership. The ASF licenses this file +~~ to you under the Apache License, Version 2.0 (the +~~ "License"); you may not use this file except in compliance +~~ with the License. You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, +~~ software distributed under the License is distributed on an +~~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +~~ KIND, either express or implied. See the License for the +~~ specific language governing permissions and limitations +~~ under the License. + + ------------- + Usage + ------------- + +{Usage} + + Mime4j provides two different API's: An event based API by using + the {{{apidocs/org/apache/james/mime4j/MimeStreamParser.html} + MimeStreamParser}}. Alternatively, you may use the iterative + API, which is available through the + {{{apidocs/org/apache/james/mime4j/MimeTokenStream.html} + MimeTokenStream}}. In terms of speed, you should not note + any differences. + + * {{{#Token Streams}Token Streams}} + + * {{{#Sample Token Stream}Sample Token Stream}} + + * {{{#Event Handlers}Event Handlers}} + + * {{{#Sample Event Stream}Sample Event Stream}} + +{Token Streams} + + The iterative approach is using the class + {{{apidocs/org/apache/james/mime4j/MimeTokenStream.html} + MimeTokenStream}}. Here's an example, how you could use + the token stream: + +-------------------------------------------------------------------- + MimeTokenStream stream = new MimeTokenStream(); + stream.parse(new BufferedInputStream(new FileInputStream("mime.msg"))); + for (int state = stream.getState(); + state != MimeTokenStream.T_END_OF_STREAM; + state = stream.next()) { + switch (state) { + case MimeTokenStream.T_BODY: + System.out.println("Body detected, contents = " + + stream.getInputStream() + ", header data = " + + stream.getBodyDescriptor()); + break; + case MimeTokenStream.T_FIELD: + System.out.println("Header field detected: " + + stream.getField()); + break; + case MimeTokenStream.T_START_MULTIPART: + System.out.println("Multipart message detexted," + + " header data = " + + stream.getBodyDescriptor()); + ... + } + } +-------------------------------------------------------------------- + + The token stream provides a set of tokens. Tokens are identified + by a state. Most states are simply event indicators, with no + additional data available. However, there are some states, + which provide additional data. For example, the state + <<>>, which indicates that an actual body is available, + If you note this state, then you may ask for the bodies contents, + which are provided through the <<>> method, + or you might ask for the header data by invoking + <<>>. + +{Sample Token Stream} + + The following sample should give you a rough idea of the order, + in which you'll receive tokens: + +-------------------------------------------------------------------- + T_START_MESSAGE + T_START_HEADER + T_FIELD + T_FIELD + ... + T_END_HEADER + T_START_MULTIPART + T_PREAMBLE + T_START_BODYPART + T_START_HEADER + T_FIELD + T_FIELD + ... + T_END_HEADER + T_BODY + T_END_BODYPART + T_START_BODYPART + T_START_HEADER + T_FIELD + T_FIELD + ... + T_END_HEADER + T_BODY + T_END_BODYPART + T_EPILOGUE + T_END_MULTIPART + T_END_MESSAGE +-------------------------------------------------------------------- + + The example shows a multipart message with two parts. + +{Event Handlers} + + The event based API requires, that you provide an event handler, + which receives events. The event handler is an object, which + implements the {{{apidocs/org/apache/james/mime4j/ContentHandler.html} + ContentHandler}} interface. Here's an example, how you could + implement an event handler: + +-------------------------------------------------------------------- + public class MyContentHandler extends org.apache.james.mime4j.ContentHandler { + public body(BodyDescriptor bd, InputStream is) + throws MimeException, IOException { + System.out.println("Body detected, contents = " + + is + ", header data = " + bd); + } + public void field(String fieldData) throws MimeException { + System.out.println("Header field detected: " + + fieldData); + } + public void startMultipart(BodyDescriptor bd) throws MimeException { + System.out.println("Multipart message detexted, header data = " + + bd); + } + ... + } +-------------------------------------------------------------------- + + A little bit of additional code allows us to create an example, which + is functionally equivalent to the example from the section on + {{{#Token Streams}Token Streams}}: + +-------------------------------------------------------------------- + ContentHandler handler = new MyContentHandler(); + MimeStreamParser parser = new MimeStreamParser(); + parser.setContentHandler(handler); + parser.parse(new BufferedInputStream(new FileInputStream("mime.msg"))); +-------------------------------------------------------------------- + +{Sample Event Stream} + + Like above for tokens, we provide an additional example, which + demonstrates the typical order of events that you have to expect: + +-------------------------------------------------------------------- + startMessage() + startHeader() + field(...) + field(...) + ... + endHeader() + startMultipart() + preamble(...) + startBodyPart() + startHeader() + field(...) + field(...) + ... + endHeader() + body() + endBodyPart() + startBodyPart() + startHeader() + field(...) + field(...) + ... + endHeader() + body() + endBodyPart() + epilogue(...) + endMultipart() + endMessage() +-------------------------------------------------------------------- Index: C:/Workspace/mime4j/pom.xml =================================================================== --- C:/Workspace/mime4j/pom.xml (revision 554413) +++ C:/Workspace/mime4j/pom.xml (working copy) @@ -288,8 +288,7 @@ maven-javadoc-plugin - org.codehaus.mojo - jxr-maven-plugin + maven-jxr-plugin