From 4dfa8fd55463206e1e94358745ebdac69af7c412 Mon Sep 17 00:00:00 2001
From: Jukka Zitting <jukka@apache.org>
Date: Fri, 8 Oct 2010 18:13:30 +0200
Subject: [PATCH 2/2] JCR-2762: Optimize bundle serialization

Since practically all of the names in the standard JCR namespaces are already included in BundleNames, we can use the namespace index field more efficiently to avoid duplicate serialization of most custom namespace URIs.
---
 .../core/persistence/util/BundleNames.java         |   55 ++------------------
 .../core/persistence/util/BundleReader.java        |   16 +++++-
 .../core/persistence/util/BundleWriter.java        |   36 ++++++++++---
 .../core/persistence/util/BundleBindingTest.java   |   30 +++++++++++
 4 files changed, 77 insertions(+), 60 deletions(-)

diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java
index 2d4d7e7..d04c164 100644
--- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java
+++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java
@@ -34,9 +34,9 @@ import org.apache.jackrabbit.spi.commons.name.NameConstants;
 
 
 /**
- * Static collection of common JCR names and namespaces. This class is
- * used by the {@link BundleWriter} and {@link BundleReader} classes to
- * optimize the serialization of names used in bundles.
+ * Static collection of common JCR names. This class is used by the
+ * {@link BundleWriter} and {@link BundleReader} classes to optimize the
+ * serialization of names used in bundles.
  */
 class BundleNames {
 
@@ -163,33 +163,11 @@ class BundleNames {
     private static final Map<Name, Integer> NAME_MAP =
         new HashMap<Name, Integer>();
 
-    /**
-     * Static list of standard namespaces.
-     */
-    private static final String[] NAMESPACE_ARRAY = {
-        // WARNING: Only edit if you really know what you're doing!
-        Name.NS_DEFAULT_URI,
-        Name.NS_JCR_URI,
-        Name.NS_MIX_URI,
-        Name.NS_NT_URI,
-        Name.NS_XMLNS_URI,
-        Name.NS_XML_URI,
-        Name.NS_REP_URI
-    };  // WARNING: Only edit if you really know what you're doing!
-
-    private static final Map<String, Integer> NAMESPACE_MAP =
-        new HashMap<String, Integer>();
-
     static {
-        assert NAME_ARRAY.length < 0x80;
+        assert NAME_ARRAY.length <= 0x80;
         for (int i = 0; i < NAME_ARRAY.length; i++) {
             NAME_MAP.put(NAME_ARRAY[i], i);
         }
-
-        assert NAMESPACE_ARRAY.length < 0x08;
-        for (int i = 0; i < NAMESPACE_ARRAY.length; i++) {
-            NAMESPACE_MAP.put(NAMESPACE_ARRAY[i], i);
-        }
     }
 
     /**
@@ -217,29 +195,4 @@ class BundleNames {
         }
     }
 
-    /**
-     * Returns the three-bit index (0-6) of a common JCR namespace,
-     * or -1 if the given namespace URI is not known.
-     *
-     * @param uri namespace URI
-     * @return three-bit index of the namespace, or -1
-     */
-    public static int namespaceToIndex(String uri) {
-        Integer index = NAMESPACE_MAP.get(uri);
-        if (index != null) {
-            return index;
-        } else {
-            return -1;
-        }
-    }
-
-    public static String indexToNamespace(int index) throws IOException {
-        try {
-            return NAMESPACE_ARRAY[index];
-        } catch (ArrayIndexOutOfBoundsException e) {
-            throw new IOExceptionWithCause(
-                    "Invalid common JCR namespace index: " + index, e);
-        }
-    }
-
 }
diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
index c89696e..9ec12f6 100644
--- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
+++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
@@ -51,6 +51,15 @@ class BundleReader {
     private final int version;
 
     /**
+     * The default namespace and the first six other namespaces used in this
+     * bundle. Used by the {@link #readName()} method to keep track of
+     * already seen namespaces.
+     */
+    private final String[] namespaces =
+        // NOTE: The length of this array must be seven
+        { Name.NS_DEFAULT_URI, null, null, null, null, null, null };
+
+    /**
      * Creates a new bundle deserializer.
      *
      * @param binding bundle binding
@@ -324,10 +333,13 @@ class BundleReader {
         } else {
             String uri;
             int ns = (b >> 4) & 0x07;
-            if (ns != 0x07) {
-                uri = BundleNames.indexToNamespace(ns);
+            if (ns < namespaces.length && namespaces[ns] != null) {
+                uri = namespaces[ns];
             } else {
                 uri = in.readUTF();
+                if (ns < namespaces.length) {
+                    namespaces[ns] = uri;
+                }
             }
 
             String local;
diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
index f9d0dae..0f37ef7 100644
--- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
+++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
@@ -50,6 +50,15 @@ class BundleWriter {
     private final DataOutputStream out;
 
     /**
+     * The default namespace and the first six other namespaces used in this
+     * bundle. Used by the {@link #writeName(Name)} method to keep track of
+     * already seen namespaces.
+     */
+    private final String[] namespaces =
+        // NOTE: The length of this array must be seven
+        { Name.NS_DEFAULT_URI, null, null, null, null, null, null };
+
+    /**
      * Creates a new bundle serializer.
      *
      * @param binding bundle binding
@@ -58,6 +67,7 @@ class BundleWriter {
      */
     public BundleWriter(BundleBinding binding, OutputStream stream)
             throws IOException {
+        assert namespaces.length == 7;
         this.binding = binding;
         this.out = new DataOutputStream(stream);
         this.out.writeByte(BundleBinding.VERSION_CURRENT);
@@ -363,11 +373,15 @@ class BundleWriter {
      * +-------------------------------+
      * </pre>
      * <p>
-     * The three-bit namespace index identifies either a known namespace
-     * in the {@link BundleNames} class (values 0 - 6) or an explicit
-     * namespace URI string that is written using
-     * {@link DataOutputStream#writeUTF(String)} right after this byte
-     * (value 7).
+     * The three-bit namespace index identifies the namespace of the name.
+     * The serializer keeps track of the default namespace (value 0) and at
+     * most six other other namespaces (values 1-6), in the order they appear
+     * in the bundle. When one of these six custom namespaces first appears
+     * in the bundle, then the namespace URI is written using
+     * {@link DataOutputStream#writeUTF(String)} right after this byte.
+     * Later uses of such a namespace simply refers back to the already read
+     * namespace URI string. Any other namespaces are identified with value 7
+     * and always written to the bundle after this byte.
      * <p>
      * The four-bit name length field indicates the length (in UTF-8 bytes)
      * of the local part of the name. Since zero-length local names are not
@@ -388,15 +402,23 @@ class BundleWriter {
             out.writeByte(index);
         } else {
             String uri = name.getNamespaceURI();
-            int ns = BundleNames.namespaceToIndex(uri) & 0x07;
+            int ns = 0;
+            while (ns < namespaces.length
+                    && namespaces[ns] != null
+                    && !namespaces[ns].equals(uri)) {
+                ns++;
+            }
 
             String local = name.getLocalName();
             byte[] bytes = local.getBytes("UTF-8");
             int len = Math.min(bytes.length - 1, 0x0f);
 
             out.writeByte(0x80 | ns << 4 | len);
-            if (ns == 0x07) {
+            if (ns == namespaces.length || namespaces[ns] == null) {
                 out.writeUTF(uri);
+                if (ns < namespaces.length) {
+                    namespaces[ns] = uri;
+                }
             }
             if (len != 0x0f) {
                 out.write(bytes);
diff --git a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java
index 13ab90b..9fb251b 100644
--- a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java
+++ b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java
@@ -281,6 +281,36 @@ public class BundleBindingTest extends TestCase {
                 10, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 0 });
     }
 
+    /**
+     * Tests serialization of custom namespaces.
+     */
+    public void testCustomNamespaces() throws Exception {
+        NodePropBundle bundle = new NodePropBundle(new NodeId());
+        bundle.setParentId(new NodeId());
+        bundle.setNodeTypeName(NameConstants.NT_UNSTRUCTURED);
+        bundle.setMixinTypeNames(Collections.<Name>emptySet());
+        bundle.setSharedSet(Collections.<NodeId>emptySet());
+
+        NameFactory factory = NameFactoryImpl.getInstance();
+        bundle.addChildNodeEntry(factory.create("ns1", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns2", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns3", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns4", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns5", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns6", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns7", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns8", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns1", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns1", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns2", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns3", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns1", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns2", "test"), new NodeId());
+        bundle.addChildNodeEntry(factory.create("ns3", "test"), new NodeId());
+
+        assertBundleRoundtrip(bundle);
+    }
+
     private void assertBundleRoundtrip(NodePropBundle bundle)
             throws Exception {
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-- 
1.6.2.5

