Index: jackrabbit-text-extractors/pom.xml =================================================================== --- jackrabbit-text-extractors/pom.xml (revision 645102) +++ jackrabbit-text-extractors/pom.xml (working copy) @@ -79,6 +79,11 @@ slf4j-api + asm + asm + 3.1 + + org.slf4j slf4j-log4j12 test Index: jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/ClassFileTextExtractor.java =================================================================== --- jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/ClassFileTextExtractor.java (revision 0) +++ jackrabbit-text-extractors/src/main/java/org/apache/jackrabbit/extractor/ClassFileTextExtractor.java (revision 0) @@ -0,0 +1,352 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.extractor; + +import java.io.CharArrayReader; +import java.io.CharArrayWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.lang.reflect.UndeclaredThrowableException; +import java.util.Iterator; +import java.util.Set; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.objectweb.asm.AnnotationVisitor; +import org.objectweb.asm.Attribute; +import org.objectweb.asm.ClassReader; +import org.objectweb.asm.ClassVisitor; +import org.objectweb.asm.FieldVisitor; +import org.objectweb.asm.MethodVisitor; +import org.objectweb.asm.Opcodes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Text extractor for Java Class files. This class extracts class names + * method signatures and field names from java class files + * + * This class can handle any version of the java class file format + */ +public class ClassFileTextExtractor extends AbstractTextExtractor { + /** + * Logger instance. + */ + private static final Logger logger = + LoggerFactory.getLogger(ClassFileTextExtractor.class); + + /** + * Creates a new PlainTextExtractor instance. + */ + public ClassFileTextExtractor() { + super(new String[]{"application/java"}); + } + + /** + * Returns a reader for the text content of the given java class. + * Returns an empty reader if the java class file could not be parsed. + * + * @param stream java class file + * @param type ignored + * @param encoding character encoding, or null + * @return reader for the text content of the given java class file, + * or an empty reader if the document could not be parsed + * @throws IOException if the java class stream can not be closed + */ + public Reader extractText(InputStream stream, String type, String encoding) + throws IOException { + try { + ClassReader cr = new ClassReader(stream); + ExtractorVisitor visitor = new ExtractorVisitor(); + cr.accept(visitor, ClassReader.SKIP_FRAMES | ClassReader.SKIP_CODE); + return new CharArrayReader(visitor.getData()); + } catch (UndeclaredThrowableException ute) { + logger.warn("Failed to extract java class text content", ute.getUndeclaredThrowable().getMessage()); + return new StringReader(""); + } finally { + stream.close(); + } + } + + static class ExtractorVisitor implements ClassVisitor { + + private static final String sep = System.getProperty("line.separator"); + + private String packageName; + private String className; + private Set imports; + private CharArrayWriter classWriter; + private Pattern parmPattern = Pattern.compile("(\\[*(?:I|J|F|D|S|C|Z|B|(?:L[^;]+;)))"); + + public ExtractorVisitor() { + packageName = null; + imports = new TreeSet(); + classWriter = new CharArrayWriter(); + } + + public char[] getData() throws IOException { + CharArrayWriter prefixWriter = new CharArrayWriter(); + if (packageName != null) { + prefixWriter.write("package "); + prefixWriter.write(packageName); + prefixWriter.write(";"); + prefixWriter.write(sep); + } + + imports.remove(packageName); + imports.remove("java.lang"); + + Iterator it = imports.iterator(); + while (it.hasNext()) + { + String importName = (String)it.next(); + prefixWriter.write("import "); + prefixWriter.write(importName); + prefixWriter.write(".*;"); + prefixWriter.write(sep); + } + + int prefixLen = prefixWriter.size(); + int classLen = classWriter.size(); + int totalLen = prefixLen + classLen + sep.length(); + char[] buffer = new char[totalLen]; + System.arraycopy(prefixWriter.toCharArray(), 0, buffer, 0, prefixLen); + System.arraycopy(sep.toCharArray(), 0, buffer, prefixLen, sep.length()); + System.arraycopy(classWriter.toCharArray(), 0, buffer, prefixLen + sep.length(), classLen); + return buffer; + } + + public void visit(int version, int access, String name, String signature, String superName, String[] interfaces) { + try { + className = name.replace('/', '.'); + int dotPos = className.lastIndexOf('.'); + if (dotPos >= 0) { + packageName = className.substring(0, dotPos); + className = className.substring(dotPos+1); + + } + writeAccess(access); + classWriter.write("class "); + classWriter.write(className); + classWriter.write(" extends "); + classWriter.write(parseClassName(superName)); + if ((interfaces != null) && (interfaces.length > 0)) { + String comma = " "; + for (int i = 0; i < interfaces.length; i++) { + classWriter.write(comma); + classWriter.write(parseTypeSignature(interfaces[i])); + comma = ", "; + } + } + classWriter.write(" {"); + classWriter.write(sep); + } catch (IOException ioe) { + throw new UndeclaredThrowableException(ioe, "Failed visiting class information for " + name); + } + } + + public AnnotationVisitor visitAnnotation(String desc, boolean visible) { + try { + classWriter.write("@"); + classWriter.write(desc); + classWriter.write(" "); + return null; + } catch (IOException ioe) { + throw new UndeclaredThrowableException(ioe, "Failed visiting annotation for " + desc); + } + } + + public void visitAttribute(Attribute attr) { + //do nothing + } + + public void visitEnd() { + try { + classWriter.write("}"); + } catch (IOException ioe) { + throw new UndeclaredThrowableException(ioe, "Failed visiting end"); + } + } + + public FieldVisitor visitField(int access, String name, String desc, String signature, Object value) { + try { + if ((access & Opcodes.ACC_SYNTHETIC) == 0) { + writeAccess(access); + classWriter.write(parseTypeSignature(desc)); + classWriter.write(" "); + classWriter.write(name); + + if (value != null) { + classWriter.write(" = "); + classWriter.write(value.toString()); + } + classWriter.write(";"); + classWriter.write(sep); + } + return null; + } catch (IOException ioe) { + throw new UndeclaredThrowableException(ioe, "Failed visiting Field " + name); + } + } + + public void visitInnerClass(String name, String outerName, String innerName, int access) { + //do nothing + } + + public MethodVisitor visitMethod(int access, String name, String desc, String signature, String[] exceptions) { + try { + if ((access & Opcodes.ACC_SYNTHETIC) == 0) { + writeAccess(access); + classWriter.write(parseMethodSignature(name, desc)); + if ((exceptions != null) && (exceptions.length > 0)) { + String comma = " throws "; + for (int i = 0; i < exceptions.length; i++) { + classWriter.write(comma); + classWriter.write(parseClassName(exceptions[i])); + comma = ", "; + } + } + classWriter.write(" {"); + classWriter.write(sep); + classWriter.write("}"); + classWriter.write(sep); + } + return null; + } catch (IOException ioe) { + throw new UndeclaredThrowableException(ioe, "Failed visiting Field " + name); + } + } + + public void visitOuterClass(String arg0, String arg1, String arg2) { + //do nothing + } + + public void visitSource(String source, String debug) { + //do nothing + } + + private void writeAccess(int access) throws IOException { + if ((access & Opcodes.ACC_PUBLIC) != 0) + classWriter.write("public "); + else if ((access & Opcodes.ACC_PRIVATE) != 0) + classWriter.write("private "); + else if ((access & Opcodes.ACC_PROTECTED) != 0) + classWriter.write("protected "); + + if ((access & Opcodes.ACC_STATIC) != 0) + classWriter.write("static "); + } + + private String parseTypeSignature(String typeSig) { + int dims = 0; + int pos = 0; + while (typeSig.charAt(pos) == '[') { + pos++; + dims++; + } + + String typeName; + + char firstTypeChar = typeSig.charAt(pos); + if (firstTypeChar == 'L') { + typeName = typeSig.substring(pos+1, typeSig.length() - 1).replace('/', '.'); + int lastDotPos = typeName.lastIndexOf('.'); + if (lastDotPos >= 0) { + imports.add(typeName.substring(0, lastDotPos)); + typeName = typeName.substring(lastDotPos+1); + } + } else { + switch (firstTypeChar) { + case 'I': + typeName = "int"; + break; + case 'J': + typeName = "long"; + break; + case 'F': + typeName = "float"; + break; + case 'D': + typeName = "double"; + break; + case 'B': + typeName = "byte"; + break; + case 'Z': + typeName = "boolean"; + break; + case 'S': + typeName = "short"; + break; + default: + typeName = "void"; + break; + } + } + + for (int i = 0; i < dims; i++) { + typeName += "[]"; + } + + return typeName; + } + + private String parseClassName(String clsName) { + clsName = clsName.replace('/', '.'); + int dotPos = clsName.lastIndexOf('.'); + if (dotPos >= 0) { + imports.add(clsName.substring(0, dotPos)); + clsName = clsName.substring(dotPos+1); + } + return clsName; + } + + private String parseMethodSignature(String methodName, String methodSig) { + StringBuffer methodDetails = new StringBuffer(32); + + int rparenPos = methodSig.indexOf(')'); + String parms = methodSig.substring(1, rparenPos); + String returnType = methodSig.substring(rparenPos+1); + methodDetails.append(parseTypeSignature(returnType)); + methodDetails.append(" "); + if ("".equals(methodName)) + methodDetails.append(className); + else + methodDetails.append(methodName); + methodDetails.append("("); + + Matcher m = parmPattern.matcher(parms); + int start = 0; + String comma = ""; + while (m.find(start)) { + String parm = parms.substring(m.start(), m.end()); + methodDetails.append(comma); + methodDetails.append(parseTypeSignature(parm)); + comma = ", "; + start = m.end(); + } + + methodDetails.append(")"); + + return methodDetails.toString(); + + } + } +} Index: jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.java =================================================================== --- jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.java (revision 0) +++ jackrabbit-text-extractors/src/test/java/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.java (revision 0) @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.extractor; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import junit.framework.TestCase; + +/** + * Unit tests for the {@link ClassFileTextExtractor} class. + */ +public class ClassFileTextExtractorTest extends TestCase { + /** + * Java Class extractor being tested. + */ + private ClassFileTextExtractor extractor; + + /** + * Creates the java class extractor to be tested. + */ + protected void setUp() throws Exception { + super.setUp(); + extractor = new ClassFileTextExtractor(); + } + + /** + * Tests that the extractor supports application/java + */ + public void testContentTypes() { + Set types = new HashSet(); + types.addAll(Arrays.asList(extractor.getContentTypes())); + assertTrue( + "ClassFileTextExtractor does not support application/java", + types.contains("application/java")); + assertEquals( + "ClassFileTextExtractor supports unknown content types", + 1, types.size()); + } + + /** + * Tests that the extractor correctly handles a normal stream. + * + * @throws IOException on IO errors + */ + public void testNormalStream() throws IOException { + InputStream is = ClassFileTextExtractor.class.getResourceAsStream("/org/apache/jackrabbit/extractor/ClassFileTextExtractorTest.class"); + Reader reader = extractor.extractText(is, "application/java", null); + String sep = System.getProperty("line.separator"); + String expectedResult = "package org.apache.jackrabbit.extractor;" + sep + + "import java.io.*;" + sep + + "import java.util.*;" + sep + + "import junit.framework.*;" + sep + + sep + + "public class ClassFileTextExtractorTest extends TestCase {" + sep + + "private ClassFileTextExtractor extractor;" + sep + + "public void ClassFileTextExtractorTest() {" + sep + + "}" + sep + + "protected void setUp() throws Exception {" + sep + + "}" + sep + + "public void testContentTypes() {" + sep + + "}" + sep + + "public void testNormalStream() throws IOException {" + sep + + "}" + sep + + "protected boolean sampleMethod(int, String, double[], Set[]) {" + sep + + "}" + sep + + "}"; + assertEquals(expectedResult, ExtractorHelper.read(reader)); + } + + /** + * A sample method just to make sure that the parser is working + */ + protected boolean sampleMethod(int i, String s, double[] d, Set[] ss) { + return true; + } + + +}