Index: jackrabbit-text-extractors/pom.xml
===================================================================
--- jackrabbit-text-extractors/pom.xml (revision 645290)
+++ jackrabbit-text-extractors/pom.xml (working copy)
@@ -79,6 +79,11 @@
slf4j-api
+ asm
+ asm
+ 3.1
+
+
org.slf4j
slf4j-log4j12
test
Index: jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/ClassFileTextExtractor.java
===================================================================
--- jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/ClassFileTextExtractor.java (revision 0)
+++ jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/ClassFileTextExtractor.java (revision 0)
@@ -0,0 +1,352 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.extractor;
+
+import java.io.CharArrayReader;
+import java.io.CharArrayWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.objectweb.asm.AnnotationVisitor;
+import org.objectweb.asm.Attribute;
+import org.objectweb.asm.ClassReader;
+import org.objectweb.asm.ClassVisitor;
+import org.objectweb.asm.FieldVisitor;
+import org.objectweb.asm.MethodVisitor;
+import org.objectweb.asm.Opcodes;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Text extractor for Java Class files. This class extracts class names
+ * method signatures and field names from java class files
+ *
+ * This class can handle any version of the java class file format
+ */
+public class ClassFileTextExtractor extends AbstractTextExtractor {
+ /**
+ * Logger instance.
+ */
+ private static final Logger logger =
+ LoggerFactory.getLogger(ClassFileTextExtractor.class);
+
+ /**
+ * Creates a new PlainTextExtractor instance.
+ */
+ public ClassFileTextExtractor() {
+ super(new String[]{"application/java"});
+ }
+
+ /**
+ * Returns a reader for the text content of the given java class.
+ * Returns an empty reader if the java class file could not be parsed.
+ *
+ * @param stream java class file
+ * @param type ignored
+ * @param encoding character encoding, or null
+ * @return reader for the text content of the given java class file,
+ * or an empty reader if the document could not be parsed
+ * @throws IOException if the java class stream can not be closed
+ */
+ public Reader extractText(InputStream stream, String type, String encoding)
+ throws IOException {
+ try {
+ ClassReader cr = new ClassReader(stream);
+ ExtractorVisitor visitor = new ExtractorVisitor();
+ cr.accept(visitor, ClassReader.SKIP_FRAMES | ClassReader.SKIP_CODE);
+ return new CharArrayReader(visitor.getData());
+ } catch (UndeclaredThrowableException ute) {
+ logger.warn("Failed to extract java class text content", ute.getUndeclaredThrowable().getMessage());
+ return new StringReader("");
+ } finally {
+ stream.close();
+ }
+ }
+
+ static class ExtractorVisitor implements ClassVisitor {
+
+ private static final String sep = System.getProperty("line.separator");
+
+ private String packageName;
+ private String className;
+ private Set imports;
+ private CharArrayWriter classWriter;
+ private Pattern parmPattern = Pattern.compile("(\\[*(?:I|J|F|D|S|C|Z|B|(?:L[^;]+;)))");
+
+ public ExtractorVisitor() {
+ packageName = null;
+ imports = new TreeSet();
+ classWriter = new CharArrayWriter();
+ }
+
+ public char[] getData() throws IOException {
+ CharArrayWriter prefixWriter = new CharArrayWriter();
+ if (packageName != null) {
+ prefixWriter.write("package ");
+ prefixWriter.write(packageName);
+ prefixWriter.write(";");
+ prefixWriter.write(sep);
+ }
+
+ imports.remove(packageName);
+ imports.remove("java.lang");
+
+ Iterator it = imports.iterator();
+ while (it.hasNext())
+ {
+ String importName = (String)it.next();
+ prefixWriter.write("import ");
+ prefixWriter.write(importName);
+ prefixWriter.write(".*;");
+ prefixWriter.write(sep);
+ }
+
+ int prefixLen = prefixWriter.size();
+ int classLen = classWriter.size();
+ int totalLen = prefixLen + classLen + sep.length();
+ char[] buffer = new char[totalLen];
+ System.arraycopy(prefixWriter.toCharArray(), 0, buffer, 0, prefixLen);
+ System.arraycopy(sep.toCharArray(), 0, buffer, prefixLen, sep.length());
+ System.arraycopy(classWriter.toCharArray(), 0, buffer, prefixLen + sep.length(), classLen);
+ return buffer;
+ }
+
+ public void visit(int version, int access, String name, String signature, String superName, String[] interfaces) {
+ try {
+ className = name.replace('/', '.');
+ int dotPos = className.lastIndexOf('.');
+ if (dotPos >= 0) {
+ packageName = className.substring(0, dotPos);
+ className = className.substring(dotPos+1);
+
+ }
+ writeAccess(access);
+ classWriter.write("class ");
+ classWriter.write(className);
+ classWriter.write(" extends ");
+ classWriter.write(parseClassName(superName));
+ if ((interfaces != null) && (interfaces.length > 0)) {
+ String comma = " ";
+ for (int i = 0; i < interfaces.length; i++) {
+ classWriter.write(comma);
+ classWriter.write(parseTypeSignature(interfaces[i]));
+ comma = ", ";
+ }
+ }
+ classWriter.write(" {");
+ classWriter.write(sep);
+ } catch (IOException ioe) {
+ throw new UndeclaredThrowableException(ioe, "Failed visiting class information for " + name);
+ }
+ }
+
+ public AnnotationVisitor visitAnnotation(String desc, boolean visible) {
+ try {
+ classWriter.write("@");
+ classWriter.write(desc);
+ classWriter.write(" ");
+ return null;
+ } catch (IOException ioe) {
+ throw new UndeclaredThrowableException(ioe, "Failed visiting annotation for " + desc);
+ }
+ }
+
+ public void visitAttribute(Attribute attr) {
+ //do nothing
+ }
+
+ public void visitEnd() {
+ try {
+ classWriter.write("}");
+ } catch (IOException ioe) {
+ throw new UndeclaredThrowableException(ioe, "Failed visiting end");
+ }
+ }
+
+ public FieldVisitor visitField(int access, String name, String desc, String signature, Object value) {
+ try {
+ if ((access & Opcodes.ACC_SYNTHETIC) == 0) {
+ writeAccess(access);
+ classWriter.write(parseTypeSignature(desc));
+ classWriter.write(" ");
+ classWriter.write(name);
+
+ if (value != null) {
+ classWriter.write(" = ");
+ classWriter.write(value.toString());
+ }
+ classWriter.write(";");
+ classWriter.write(sep);
+ }
+ return null;
+ } catch (IOException ioe) {
+ throw new UndeclaredThrowableException(ioe, "Failed visiting Field " + name);
+ }
+ }
+
+ public void visitInnerClass(String name, String outerName, String innerName, int access) {
+ //do nothing
+ }
+
+ public MethodVisitor visitMethod(int access, String name, String desc, String signature, String[] exceptions) {
+ try {
+ if ((access & Opcodes.ACC_SYNTHETIC) == 0) {
+ writeAccess(access);
+ classWriter.write(parseMethodSignature(name, desc));
+ if ((exceptions != null) && (exceptions.length > 0)) {
+ String comma = " throws ";
+ for (int i = 0; i < exceptions.length; i++) {
+ classWriter.write(comma);
+ classWriter.write(parseClassName(exceptions[i]));
+ comma = ", ";
+ }
+ }
+ classWriter.write(" {");
+ classWriter.write(sep);
+ classWriter.write("}");
+ classWriter.write(sep);
+ }
+ return null;
+ } catch (IOException ioe) {
+ throw new UndeclaredThrowableException(ioe, "Failed visiting Field " + name);
+ }
+ }
+
+ public void visitOuterClass(String arg0, String arg1, String arg2) {
+ //do nothing
+ }
+
+ public void visitSource(String source, String debug) {
+ //do nothing
+ }
+
+ private void writeAccess(int access) throws IOException {
+ if ((access & Opcodes.ACC_PUBLIC) != 0)
+ classWriter.write("public ");
+ else if ((access & Opcodes.ACC_PRIVATE) != 0)
+ classWriter.write("private ");
+ else if ((access & Opcodes.ACC_PROTECTED) != 0)
+ classWriter.write("protected ");
+
+ if ((access & Opcodes.ACC_STATIC) != 0)
+ classWriter.write("static ");
+ }
+
+ private String parseTypeSignature(String typeSig) {
+ int dims = 0;
+ int pos = 0;
+ while (typeSig.charAt(pos) == '[') {
+ pos++;
+ dims++;
+ }
+
+ String typeName;
+
+ char firstTypeChar = typeSig.charAt(pos);
+ if (firstTypeChar == 'L') {
+ typeName = typeSig.substring(pos+1, typeSig.length() - 1).replace('/', '.');
+ int lastDotPos = typeName.lastIndexOf('.');
+ if (lastDotPos >= 0) {
+ imports.add(typeName.substring(0, lastDotPos));
+ typeName = typeName.substring(lastDotPos+1);
+ }
+ } else {
+ switch (firstTypeChar) {
+ case 'I':
+ typeName = "int";
+ break;
+ case 'J':
+ typeName = "long";
+ break;
+ case 'F':
+ typeName = "float";
+ break;
+ case 'D':
+ typeName = "double";
+ break;
+ case 'B':
+ typeName = "byte";
+ break;
+ case 'Z':
+ typeName = "boolean";
+ break;
+ case 'S':
+ typeName = "short";
+ break;
+ default:
+ typeName = "void";
+ break;
+ }
+ }
+
+ for (int i = 0; i < dims; i++) {
+ typeName += "[]";
+ }
+
+ return typeName;
+ }
+
+ private String parseClassName(String clsName) {
+ clsName = clsName.replace('/', '.');
+ int dotPos = clsName.lastIndexOf('.');
+ if (dotPos >= 0) {
+ imports.add(clsName.substring(0, dotPos));
+ clsName = clsName.substring(dotPos+1);
+ }
+ return clsName;
+ }
+
+ private String parseMethodSignature(String methodName, String methodSig) {
+ StringBuffer methodDetails = new StringBuffer(32);
+
+ int rparenPos = methodSig.indexOf(')');
+ String parms = methodSig.substring(1, rparenPos);
+ String returnType = methodSig.substring(rparenPos+1);
+ methodDetails.append(parseTypeSignature(returnType));
+ methodDetails.append(" ");
+ if ("".equals(methodName))
+ methodDetails.append(className);
+ else
+ methodDetails.append(methodName);
+ methodDetails.append("(");
+
+ Matcher m = parmPattern.matcher(parms);
+ int start = 0;
+ String comma = "";
+ while (m.find(start)) {
+ String parm = parms.substring(m.start(), m.end());
+ methodDetails.append(comma);
+ methodDetails.append(parseTypeSignature(parm));
+ comma = ", ";
+ start = m.end();
+ }
+
+ methodDetails.append(")");
+
+ return methodDetails.toString();
+
+ }
+ }
+}
Index: jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.java
===================================================================
--- jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.java (revision 0)
+++ jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.java (revision 0)
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for the {@link ClassFileTextExtractor} class.
+ */
+public class ClassFileTextExtractorTest extends TestCase {
+ /**
+ * Java Class extractor being tested.
+ */
+ private ClassFileTextExtractor extractor;
+
+ /**
+ * Creates the java class extractor to be tested.
+ */
+ protected void setUp() throws Exception {
+ super.setUp();
+ extractor = new ClassFileTextExtractor();
+ }
+
+ /**
+ * Tests that the extractor supports application/java
+ */
+ public void testContentTypes() {
+ Set types = new HashSet();
+ types.addAll(Arrays.asList(extractor.getContentTypes()));
+ assertTrue(
+ "ClassFileTextExtractor does not support application/java",
+ types.contains("application/java"));
+ assertEquals(
+ "ClassFileTextExtractor supports unknown content types",
+ 1, types.size());
+ }
+
+ /**
+ * Tests that the extractor correctly handles a normal stream.
+ *
+ * @throws IOException on IO errors
+ */
+ public void testNormalStream() throws IOException {
+ InputStream is = ClassFileTextExtractor.class.getResourceAsStream("/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.class");
+ Reader reader = extractor.extractText(is, "application/java", null);
+ String sep = System.getProperty("line.separator");
+ String expectedResult = "package org.apache.jackrabbit.extractor;" + sep +
+ "import java.io.*;" + sep +
+ "import java.util.*;" + sep +
+ "import junit.framework.*;" + sep +
+ sep +
+ "public class ClassFileTextExtractorTest extends TestCase {" + sep +
+ "private ClassFileTextExtractor extractor;" + sep +
+ "public void ClassFileTextExtractorTest() {" + sep +
+ "}" + sep +
+ "protected void setUp() throws Exception {" + sep +
+ "}" + sep +
+ "public void testContentTypes() {" + sep +
+ "}" + sep +
+ "public void testNormalStream() throws IOException {" + sep +
+ "}" + sep +
+ "protected boolean sampleMethod(int, String, double[], Set[]) {" + sep +
+ "}" + sep +
+ "}";
+ assertEquals(expectedResult, ExtractorHelper.read(reader));
+ }
+
+ /**
+ * A sample method just to make sure that the parser is working
+ */
+ protected boolean sampleMethod(int i, String s, double[] d, Set[] ss) {
+ return true;
+ }
+
+
+}