### Eclipse Workspace Patch 1.0 #P jackrabbit-core-r-1372412 Index: src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java (revision 1372412) +++ src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java (working copy) @@ -16,6 +16,8 @@ */ package org.apache.jackrabbit.core.persistence.util; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -23,11 +25,17 @@ import org.apache.jackrabbit.core.data.DataStore; import org.apache.jackrabbit.core.id.NodeId; import org.apache.jackrabbit.core.util.StringIndex; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This Class implements efficient serialization methods for item states. */ public class BundleBinding { + + private static boolean ALLOW_BROKEN_BUNDLES = Boolean.getBoolean("jackrabbit.allowBrokenBundleWrites"); + + private static Logger log = LoggerFactory.getLogger(BundleBinding.class); static final int BINARY_IN_BLOB_STORE = -1; @@ -161,7 +169,44 @@ */ public void writeBundle(OutputStream out, NodePropBundle bundle) throws IOException { - new BundleWriter(this, out).writeBundle(bundle); + byte[] data; + IOException lastError = null; + for (int i = 0; i < 5; i++) { + ByteArrayOutputStream buff = new ByteArrayOutputStream(); + if (i < 3) { + new BundleWriter(this, buff).writeBundle(bundle); + } else { + log.warn("Corrupt bundle: writing slower, i = " + i); + new BundleWriterSlower(this, buff).writeBundle(bundle); + } + data = buff.toByteArray(); + NodeId id = bundle.getId(); + try { + ByteArrayInputStream in = new ByteArrayInputStream(data); + new BundleReader(this, in).readBundleNormal(id); + } catch (IOException e) { + lastError = e; + log.warn("Corrupt bundle: could not read the bundle I wrote", e); + ByteArrayInputStream in = new ByteArrayInputStream(data); + try { + new BundleReader(this, in).readBundle(id); + log.warn("Corrupt bundle: can fix error while reading"); + if (i == 4 && ALLOW_BROKEN_BUNDLES) { + log.warn("Corrupt bundle: writing broken one"); + break; + } + } catch (IOException e2) { + lastError = e2; + log.warn("Corrupt bundle: could not read even when trying harder", e2); + } + continue; + } + out.write(data); + return; + } + String msg = "Corrupt bundle: could not write a correct bundle, giving up: " + bundle; + log.error(msg); + throw new IOException(msg, lastError); } } Index: src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriterSlower.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriterSlower.java (revision 0) +++ src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriterSlower.java (revision 0) @@ -0,0 +1,729 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.persistence.util; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.math.BigDecimal; +import java.util.Calendar; +import java.util.Collection; +import java.util.GregorianCalendar; + +import javax.jcr.PropertyType; +import javax.jcr.RepositoryException; + +import org.apache.commons.io.IOUtils; +import org.apache.jackrabbit.core.data.DataStore; +import org.apache.jackrabbit.core.id.NodeId; +import org.apache.jackrabbit.core.value.InternalValue; +import org.apache.jackrabbit.core.persistence.util.NodePropBundle.ChildNodeEntry; +import org.apache.jackrabbit.core.persistence.util.NodePropBundle.PropertyEntry; +import org.apache.jackrabbit.spi.Name; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Bundle slow but "careful" serializer. Careful specially for boolean properties. + * + * @see BundleReader + */ +class BundleWriterSlower { + + /** Logger instance */ + private static Logger log = LoggerFactory.getLogger(BundleWriterSlower.class); + + private final BundleBinding binding; + + private final DataOutputStream out; + + /** + * The default namespace and the first six other namespaces used in this + * bundle. Used by the {@link #writeName(Name)} method to keep track of + * already seen namespaces. + */ + private final String[] namespaces = + // NOTE: The length of this array must be seven + { Name.NS_DEFAULT_URI, null, null, null, null, null, null }; + + /** + * Creates a new bundle serializer. + * + * @param binding bundle binding + * @param stream stream to which the bundle will be written + * @throws IOException if an I/O error occurs. + */ + public BundleWriterSlower(BundleBinding binding, OutputStream stream) + throws IOException { + assert namespaces.length == 7; + this.binding = binding; + this.out = new DataOutputStream(stream); + this.out.writeByte(BundleBinding.VERSION_CURRENT); + } + + /** + * Serializes a NodePropBundle to a data output stream + * + * @param bundle the bundle to serialize + * @throws IOException if an I/O error occurs. + */ + public void writeBundle(NodePropBundle bundle) + throws IOException { + long size = out.size(); + + // primaryType + writeName(bundle.getNodeTypeName()); + + // parentUUID + NodeId parentId = bundle.getParentId(); + if (parentId == null) { + parentId = BundleBinding.NULL_PARENT_ID; + } + writeNodeId(parentId); + + // write mod count + writeVarInt(bundle.getModCount()); + + Collection mixins = bundle.getMixinTypeNames(); + Collection properties = bundle.getPropertyEntries(); + Collection nodes = bundle.getChildNodeEntries(); + Collection shared = bundle.getSharedSet(); + + int mn = mixins.size(); + int pn = properties.size(); + int nn = nodes.size(); + int sn = shared.size(); + int referenceable = 0; + if (bundle.isReferenceable()) { + referenceable = 1; + } + out.writeByte( + Math.min(mn, 1) << 7 + | Math.min(pn, 7) << 4 + | Math.min(nn, 3) << 2 + | Math.min(sn, 1) << 1 + | referenceable); + + // mixin types + writeVarInt(mn, 1); + for (Name name : mixins) { + writeName(name); + } + + // properties + writeVarInt(pn, 7); + for (PropertyEntry property : properties) { + writeState(property); + } + + // child nodes (list of name/uuid pairs) + writeVarInt(nn, 3); + for (ChildNodeEntry child : nodes) { + writeName(child.getName()); // name + writeNodeId(child.getId()); // uuid + } + + // write shared set + writeVarInt(sn, 1); + for (NodeId nodeId: shared) { + writeNodeId(nodeId); + } + + // set size of bundle + bundle.setSize(out.size() - size); + } + + /** + * Serializes a property entry. The serialization begins with the + * property name followed by a single byte that encodes the type and + * multi-valuedness of the property: + *
+     * +-------------------------------+
+     * |   mv count    |     type      |
+     * +-------------------------------+
+     * 
+ *

+ * The lower four bits encode the property type (0-12 in JCR 2.0) and + * higher bits indicate whether this is a multi-valued property and how + * many property values there are. A value of 0 is reserved for + * single-valued properties (that are guaranteed to always have just a + * single value), and all non-zero values indicate a multi-valued property. + *

+ * In multi-valued properties the exact value of the "mv count" field is + * the number of property values plus one and truncated at 15 (the highest + * four-bit value). If there are 14 or more (14 + 1 == 15) property values, + * then the number of additional values is serialized as a variable-length + * integer (see {@link #writeVarInt(int)}) right after this byte. + *

+ * The modification count of the property state is written next as a + * variable-length integer, followed by the serializations of all the + * values of this property. + * + * @param state the property entry to store + * @throws IOException if an I/O error occurs. + */ + private void writeState(NodePropBundle.PropertyEntry state) + throws IOException { + log.info("writing state for property " + state.getName()); + writeName(state.getName()); + + InternalValue[] values = state.getValues(); + + int type = state.getType(); + log.info("writing type " + type); + assert 0 <= type && type <= 0x0f; + if (state.isMultiValued()) { + log.info("writing multi-valued"); + int len = values.length + 1; + if (len < 0x0f) { + out.writeByte(len << 4 | type); + } else { + out.writeByte(0xf0 | type); + writeVarInt(len - 0x0f); + } + } else { + assert values.length == 1; + out.writeByte(type); + } + + writeVarInt(state.getModCount()); + + // values + for (int i = 0; i < values.length; i++) { + InternalValue val = values[i]; + switch (state.getType()) { + case PropertyType.BINARY: + try { + long size = val.getLength(); + DataStore dataStore = binding.dataStore; + if (dataStore != null) { + int maxMemorySize = dataStore.getMinRecordLength() - 1; + if (size < maxMemorySize) { + writeSmallBinary(val, state, i); + } else { + out.writeInt(BundleBinding.BINARY_IN_DATA_STORE); + val.store(dataStore); + writeString(val.toString()); + } + break; + } + // special handling required for binary value: + // spool binary value to file in blob store + if (size < 0) { + log.warn("Blob has negative size. Potential loss of data. " + + "id={} idx={}", state.getId(), String.valueOf(i)); + out.writeInt(0); + values[i] = InternalValue.create(new byte[0]); + val.discard(); + } else if (size > binding.getMinBlobSize()) { + out.writeInt(BundleBinding.BINARY_IN_BLOB_STORE); + String blobId = state.getBlobId(i); + if (blobId == null) { + BLOBStore blobStore = binding.getBlobStore(); + try { + InputStream in = val.getStream(); + try { + blobId = blobStore.createId(state.getId(), i); + blobStore.put(blobId, in, size); + state.setBlobId(blobId, i); + } finally { + IOUtils.closeQuietly(in); + } + } catch (Exception e) { + String msg = "Error while storing blob. id=" + + state.getId() + " idx=" + i + " size=" + size; + log.error(msg, e); + throw new IOException(msg); + } + try { + // replace value instance with value + // backed by resource in blob store and delete temp file + if (blobStore instanceof ResourceBasedBLOBStore) { + values[i] = InternalValue.create(((ResourceBasedBLOBStore) blobStore).getResource(blobId)); + } else { + values[i] = InternalValue.create(blobStore.get(blobId)); + } + } catch (Exception e) { + log.error("Error while reloading blob. truncating. id=" + + state.getId() + " idx=" + i + " size=" + size, e); + values[i] = InternalValue.create(new byte[0]); + } + val.discard(); + } + // store id of blob as property value + writeString(blobId); // value + } else { + // delete evt. blob + byte[] data = writeSmallBinary(val, state, i); + // replace value instance with value + // backed by resource in blob store and delete temp file + values[i] = InternalValue.create(data); + val.discard(); + } + } catch (RepositoryException e) { + String msg = "Error while storing blob. id=" + + state.getId() + " idx=" + i + " value=" + val; + log.error(msg, e); + throw new IOException(msg); + } + break; + case PropertyType.DOUBLE: + try { + out.writeDouble(val.getDouble()); + } catch (RepositoryException e) { + // should never occur + throw new IOException("Unexpected error while writing DOUBLE value."); + } + break; + case PropertyType.DECIMAL: + try { + writeDecimal(val.getDecimal()); + } catch (RepositoryException e) { + // should never occur + throw new IOException("Unexpected error while writing DECIMAL value."); + } + break; + case PropertyType.LONG: + try { + writeVarLong(val.getLong()); + } catch (RepositoryException e) { + // should never occur + throw new IOException("Unexpected error while writing LONG value."); + } + break; + case PropertyType.BOOLEAN: + log.info("Writing boolean"); + try { + boolean value = val.getBoolean(); + out.flush(); + out.writeBoolean(value); + log.info("Wrote boolean " + value); + out.flush(); + } catch (RepositoryException e) { + // should never occur + throw new IOException("Unexpected error while writing BOOLEAN value."); + } + break; + case PropertyType.NAME: + try { + writeName(val.getName()); + } catch (RepositoryException e) { + // should never occur + throw new IOException("Unexpected error while writing NAME value."); + } + break; + case PropertyType.WEAKREFERENCE: + case PropertyType.REFERENCE: + writeNodeId(val.getNodeId()); + break; + case PropertyType.DATE: + try { + writeDate(val.getCalendar()); + } catch (RepositoryException e) { + // should never occur + throw new IOException("Unexpected error while writing DATE value."); + } + break; + default: + writeString(val.toString()); + break; + } + } + } + + /** + * Write a small binary value and return the data. + * + * @param value the binary value + * @param state the property state (for error messages) + * @param i the index (for error messages) + * @return the data + * @throws IOException if the data could not be read + */ + private byte[] writeSmallBinary( + InternalValue value, NodePropBundle.PropertyEntry state, int i) + throws IOException { + try { + int size = (int) value.getLength(); + out.writeInt(size); + byte[] data = new byte[size]; + DataInputStream in = + new DataInputStream(value.getStream()); + try { + in.readFully(data); + } finally { + IOUtils.closeQuietly(in); + } + out.write(data, 0, data.length); + return data; + } catch (Exception e) { + String msg = "Error while storing blob. id=" + + state.getId() + " idx=" + i + " value=" + value; + log.error(msg, e); + throw new IOException(msg); + } + } + + /** + * Serializes a node identifier + * + * @param id the node id + * @throws IOException in an I/O error occurs. + */ + private void writeNodeId(NodeId id) throws IOException { + out.writeLong(id.getMostSignificantBits()); + out.writeLong(id.getLeastSignificantBits()); + } + + /** + * Serializes a BigDecimal + * + * @param decimal the decimal number + * @throws IOException in an I/O error occurs. + */ + private void writeDecimal(BigDecimal decimal) throws IOException { + if (decimal == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + // TODO more efficient serialization format + writeString(decimal.toString()); + } + } + + /** + * Serializes a name. The name encoding works as follows: + *

+ * First; if the name is known by the {@link BundleNames} class (this + * includes the null name), then the name is serialized + * as a single byte using the following format. + *

+     * +-------------------------------+
+     * | 0 |    common name index      |
+     * +-------------------------------+
+     * 
+ *

+ * Second; if the name is not known, it gets serialized as a + * variable-length field whose first byte looks like this: + *

+     * +-------------------------------+
+     * | 1 | ns index  |  name length  |
+     * +-------------------------------+
+     * 
+ *

+ * The three-bit namespace index identifies the namespace of the name. + * The serializer keeps track of the default namespace (value 0) and at + * most six other other namespaces (values 1-6), in the order they appear + * in the bundle. When one of these six custom namespaces first appears + * in the bundle, then the namespace URI is written using + * {@link #writeString(String)} right after this byte. + * Later uses of such a namespace simply refers back to the already read + * namespace URI string. Any other namespaces are identified with value 7 + * and always written to the bundle after this byte. + *

+ * The four-bit name length field indicates the length (in UTF-8 bytes) + * of the local part of the name. Since zero-length local names are not + * allowed, the length is first decremented by one before storing in this + * field. The UTF-8 byte sequence is written out after this byte and the + * possible namespace URI string. If the length of the local name is + * larger than 15 (i.e. would be stored as 0x0f or more), then the value + * 0x0f is stored as the name length and the name string is written as + * UTF-8 using {@link #writeBytes(byte[], int)} with a base length of + * 0x10 (0x0f + 1). + * + * @param name the name + * @throws IOException in an I/O error occurs. + */ + private void writeName(Name name) throws IOException { + int index = BundleNames.nameToIndex(name); + if (index != -1) { + assert 0 <= index && index < 0x80; + out.writeByte(index); + } else { + String uri = name.getNamespaceURI(); + int ns = 0; + while (ns < namespaces.length + && namespaces[ns] != null + && !namespaces[ns].equals(uri)) { + ns++; + } + + String local = name.getLocalName(); + byte[] bytes = local.getBytes("UTF-8"); + int len = Math.min(bytes.length - 1, 0x0f); + + out.writeByte(0x80 | ns << 4 | len); + if (ns == namespaces.length || namespaces[ns] == null) { + writeString(uri); + if (ns < namespaces.length) { + namespaces[ns] = uri; + } + } + if (len != 0x0f) { + out.write(bytes); + } else { + writeBytes(bytes, 0x0f + 1); + } + } + } + + /** + * Serializes an integer using a variable-length encoding that favors + * small positive numbers. The serialization consists of one to five + * bytes of the following format: + *

+     * +-------------------------------+
+     * | c | 7 least significant bits  |
+     * +-------------------------------+
+     * 
+ *

+ * If the given integer fits in seven bits (i.e. the value between + * 0 and 127, inclusive), then it is written as-is in a single byte. + * Otherwise the continuation flag c is set and the least + * significant seven bits are written together with the flag as a single + * byte. The integer is then shifed right seven bits and the process + * continues from the beginning. + *

+ * This format uses a single byte for values 0-127, two bytes for + * 128-16343, three for 16343-2097151, four for 2097152-268435455 + * and five bytes for all other 32-bit numbers (including negative ones). + * + * @param integer integer value + * @throws IOException if an I/O error occurs + */ + private void writeVarInt(int value) throws IOException { + while (true) { + int b = value & 0x7f; + if (b != value) { + out.writeByte(b | 0x80); + value >>>= 7; // unsigned shift + } else { + out.writeByte(b); + return; + } + } + } + + private void writeVarInt(int value, int base) throws IOException { + if (value >= base) { + writeVarInt(value - base); + } + } + + /** + * Serializes a long value using a variable length encoding like the + * one used by {@link #writeVarInt(int)} for integer values. Before + * writing out, the value is first normalized to an unsigned value + * by moving the sign bit to be the end negating the other bits of + * a negative value. This normalization step maximizes the number of + * zero high order bits for typical small values (positive or negative), + * and thus keeps the serialization short. + * + * @param value long value + * @throws IOException if an I/O error occurs + */ + private void writeVarLong(long value) throws IOException { + // Normalize to an unsigned value with the sign as the lowest bit + if (value < 0) { + value = ~value << 1 | 1; + } else { + value <<= 1; + } + while (true) { + long b = value & 0x7f; + if (b != value) { + out.writeByte((int) b | 0x80); + value >>>= 7; // unsigned shift + } else { + out.writeByte((int) b); + return; + } + } + } + + /** + * Serializes a JCR date value using the {@link #writeVarLong(long)} + * serialization on a special 64-bit date encoding. This encoding maps + * the sYYYY-MM-DDThh:mm:ss.sssTZD date format used by + * JCR to an as small 64 bit integer (positive or negative) as possible, + * while preserving full accuracy (including time zone offsets) and + * favouring common levels of accuracy (per minute, hour and day) over + * full millisecond level detail. + *

+ * Each date value is mapped to separate timestamp and timezone fields, + * both of whose lenghts are variable: + *

+     * +----- ... ------- ... --+
+     * |  timestamp  | timezone |
+     * +----- ... ------- ... --+
+     * 
+ *

+ * The type and length of the timezone field can be determined by looking + * at the two least significant bits of the value: + *

+ *
?0
+ *
+ * UTC time. The length of the timezone field is just one bit, + * i.e. the second bit is already a part of the timestamp field. + *
+ *
01
+ *
+ * The offset is counted as hours from UTC, and stored as the number + * of hours (positive or negative) in the next 5 bits (range from + * -16 to +15 hours), making the timezone field 7 bits long in total. + *
+ *
11
+ *
+ * The offset is counted as hours and minutes from UTC, and stored + * as the total minute offset (positive or negative) in the next + * 11 bits (range from -17 to +17 hours), making the timezone field + * 13 bits long in total. + *
+ *
+ *

+ * The remaining 51-63 bits of the encoded value make up the timestamp + * field that also uses the two least significant bits to indicate the + * type and length of the field: + *

+ *
00
+ *
+ * sYYYY-MM-DDT00:00:00.000, i.e. midnight of the + * specified date. The next 9 bits encode the day within the year + * (starting from 1, maximum value 366) and the remaining bits are + * used for the year, stored as an offset from year 2010. + *
+ *
01
+ *
+ * sYYYY-MM-DDThh:00:00.000, i.e. at the hour. The + * next 5 bits encode the hour within the day (starting from 0, + * maximum value 23) and the remaining bits are used as described + * above for the date. + *
+ *
10
+ *
+ * sYYYY-MM-DDThh:mm:00.000, i.e. at the minute. The + * next 11 bits encode the minute within the day (starting from 0, + * maximum value 1439) and the remaining bits are used as described + * above for the date. + *
+ *
11
+ *
+ * sYYYY-MM-DDThh:mm:ss.sss, i.e. full millisecond + * accuracy. The next 30 bits encode the millisecond within the + * day (starting from 0, maximum value 87839999) and the remaining + * bits are used as described above for the date. + *
+ *
+ *

+ * With full timezone and millisecond accuracies, this encoding leaves + * 10 bits (64 - 9 - 30 - 2 - 11 - 2) for the date offset, which allows + * for representation of all timestamps between years 1498 and 2521. + * Timestamps outside this range and with a minute-level timezone offset + * are automatically truncated to minute-level accuracy to support the + * full range of years -9999 to 9999 specified in JCR. + *

+ * Note that the year, day of year, and time of day values are stored + * as separate bit sequences to avoid problems with changing leap second + * or leap year definitions. Bit fields are used for better encoding and + * decoding performance than what would be possible with the slightly more + * space efficient mechanism of using multiplication and modulo divisions + * to separate the different timestamp fields. + * + * @param value date value + * @throws IOException if an I/O error occurs + */ + private void writeDate(Calendar value) throws IOException { + int y = value.get(Calendar.YEAR); + if (value.isSet(Calendar.ERA) + && value.get(Calendar.ERA) == GregorianCalendar.BC) { + y = 1 - y; // convert to an astronomical year + } + y -= 2010; // use a recent offset NOTE: do not change this! + + int d = value.get(Calendar.DAY_OF_YEAR); + int h = value.get(Calendar.HOUR_OF_DAY); + int m = value.get(Calendar.MINUTE); + int s = value.get(Calendar.SECOND); + int u = value.get(Calendar.MILLISECOND); + int z = value.getTimeZone().getOffset(value.getTimeInMillis()) / (60 * 1000); + int zh = z / 60; + int zm = z - zh * 60; + + long ts = y << 9 | d & 0x01ff; + + if ((u != 0 || s != 0) && ((-512 <= y && y < 512) || zm == 0)) { + ts <<= 30; + ts |= (((h * 60 + m) * 60 + s) * 1000 + u) & 0x3fffffff; // 30 bits + ts <<= 2; + ts |= 3; + } else if (m != 0) { + ts <<= 11; + ts |= (h * 60 + m) & 0x07ff; // 11 bits + ts <<= 2; + ts |= 2; + } else if (h != 0) { + ts <<= 5; + ts |= h & 0x1f; // 5 bits + ts <<= 2; + ts |= 1; + } else { + ts <<= 2; + } + + if (zm != 0) { + ts <<= 11; + ts |= z & 0x07ff; // 11 bits + writeVarLong(ts << 2 | 3); + } else if (zh != 0) { + ts <<= 5; + ts |= zh & 0x1f; // 5 bits + writeVarLong(ts << 2 | 1); + } else { + writeVarLong(ts << 1); + } + } + + /** + * Serializes a string in UTF-8. The length of the UTF-8 byte sequence + * is first written as a variable-length string (see + * {@link #writeVarInt(int)}), and then the sequence itself is written. + * + * @param value string value + * @throws IOException if an I/O error occurs + */ + private void writeString(String value) throws IOException { + writeBytes(value.getBytes("UTF-8"), 0); + } + + /** + * Serializes the given array of bytes. The length of the byte array is + * first written as a {@link #writeVarInt(int) variable length integer}, + * followed by the given bytes. + * + * @param bytes the bytes to be serialized + * @param base optional base length + * @throws IOException if an I/O error occurs + */ + private void writeBytes(byte[] bytes, int base) throws IOException { + assert bytes.length >= base; + writeVarInt(bytes.length - base); + out.write(bytes); + } + +} Index: src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java =================================================================== --- src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java (revision 1372412) +++ src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java (working copy) @@ -26,9 +26,12 @@ import org.apache.jackrabbit.spi.commons.name.NameFactoryImpl; import org.apache.jackrabbit.spi.commons.name.NameConstants; +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.SequenceInputStream; import java.util.Calendar; import java.util.Collections; import java.util.GregorianCalendar; @@ -94,27 +97,27 @@ }; private final BundleBinding binding; + + private BufferedInputStream bufferedIn; /** * Counter for the number of bytes read from the input stream. */ - private final CountingInputStream cin; + private CountingInputStream cin; /** * Wrapper for reading structured data from the input stream. */ - private final DataInputStream in; + private DataInputStream in; - private final int version; + private int version; /** * The default namespace and the first six other namespaces used in this * bundle. Used by the {@link #readName()} method to keep track of * already seen namespaces. */ - private final String[] namespaces = - // NOTE: The length of this array must be seven - { Name.NS_DEFAULT_URI, null, null, null, null, null, null }; + private String[] namespaces; /** * Creates a new bundle deserializer. @@ -126,9 +129,20 @@ public BundleReader(BundleBinding binding, InputStream stream) throws IOException { this.binding = binding; - this.cin = new CountingInputStream(stream); + this.bufferedIn = new BufferedInputStream(stream); + bufferedIn.mark(1024 * 1024); + reset(); + } + + void reset() throws IOException { + bufferedIn.reset(); + bufferedIn.mark(1024 * 1024); + this.cin = new CountingInputStream(bufferedIn); this.in = new DataInputStream(cin); this.version = in.readUnsignedByte(); + namespaces = new String[] + // NOTE: The length of this array must be seven + { Name.NS_DEFAULT_URI, null, null, null, null, null, null }; } /** @@ -139,6 +153,43 @@ * @throws IOException if an I/O error occurs. */ public NodePropBundle readBundle(NodeId id) throws IOException { + try { + return readBundleNormal(id); + } catch (IOException e) { + log.warn("Corrupt bundle: exception trying to read bundle " + id + ", trying again", e); + try { + reset(); + return readBundleTryHarder(id); + } catch (RetryException e2) { + int pos = (int) e2.getErrorPosition(); + log.warn("Corrupt bundle: exception trying to read bundle " + id + ", retry at " + pos, e); + bufferedIn.reset(); + log.warn("Corrupt bundle: inserting 1 at position " + pos); + byte[] before = new byte[pos + 2]; + DataInputStream rf = new DataInputStream(bufferedIn); + rf.readFully(before, 0, pos + 1); + before[pos + 1] = before[pos]; + before[pos] = (byte) 1; + SequenceInputStream seq = new SequenceInputStream( + new ByteArrayInputStream(before), rf); + bufferedIn = new BufferedInputStream(seq); + bufferedIn.mark(1024 * 1024); + reset(); + log.warn("Corrupt bundle: re-trying"); + try { + return readBundleTryHarder(id); + } catch (IOException e3) { + log.error("Corrupt bundle: exception trying to read bundle " + id + ", giving up (2)", e3); + throw e3; + } + } catch (IOException e2) { + log.error("Corrupt bundle: exception trying to read bundle " + id + ", giving up (1)", e2); + throw e2; + } + } + } + + public NodePropBundle readBundleNormal(NodeId id) throws IOException { long start = cin.getByteCount(); NodePropBundle bundle = new NodePropBundle(id); if (version >= BundleBinding.VERSION_3) { @@ -149,6 +200,18 @@ bundle.setSize(cin.getByteCount() - start); return bundle; } + + private NodePropBundle readBundleTryHarder(NodeId id) throws IOException { + long start = cin.getByteCount(); + NodePropBundle bundle = new NodePropBundle(id); + if (version >= BundleBinding.VERSION_3) { + readBundleNewTryHarder(bundle); + } else { + readBundleOld(bundle); + } + bundle.setSize(cin.getByteCount() - start); + return bundle; + } private void readBundleNew(NodePropBundle bundle) throws IOException { // node type @@ -210,6 +273,67 @@ bundle.setSharedSet(shared); } } + + private void readBundleNewTryHarder(NodePropBundle bundle) throws IOException { + // node type + bundle.setNodeTypeName(readName()); + + // parentUUID + NodeId parentId = readNodeId(); + if (BundleBinding.NULL_PARENT_ID.equals(parentId)) { + parentId = null; + } + bundle.setParentId(parentId); + + // read modcount + bundle.setModCount((short) readVarInt()); + + int b = in.readUnsignedByte(); + bundle.setReferenceable((b & 1) != 0); + + // mixin types + int mn = readVarInt((b >> 7) & 1, 1); + if (mn == 0) { + bundle.setMixinTypeNames(Collections.emptySet()); + } else if (mn == 1) { + bundle.setMixinTypeNames(Collections.singleton(readName())); + } else { + Set mixins = new HashSet(mn * 2); + for (int i = 0; i < mn; i++) { + mixins.add(readName()); + } + bundle.setMixinTypeNames(mixins); + } + + // properties + int pn = readVarInt((b >> 4) & 7, 7); + for (int i = 0; i < pn; i++) { + PropertyId id = new PropertyId(bundle.getId(), readName()); + bundle.addProperty(readPropertyEntryTryHarder(id)); + } + + // child nodes (list of name/uuid pairs) + int nn = readVarInt((b >> 2) & 3, 3); + for (int i = 0; i < nn; i++) { + Name name = readQName(); + NodeId id = readNodeId(); + bundle.addChildNodeEntry(name, id); + } + + // read shared set + int sn = readVarInt((b >> 1) & 1, 1); + if (sn == 0) { + bundle.setSharedSet(Collections.emptySet()); + } else if (sn == 1) { + bundle.setSharedSet(Collections.singleton(readNodeId())); + } else { + Set shared = new HashSet(); + for (int i = 0; i < sn; i++) { + shared.add(readNodeId()); + } + bundle.setSharedSet(shared); + } + } private void readBundleOld(NodePropBundle bundle) throws IOException { // read primary type...special handling @@ -426,6 +550,150 @@ return entry; } + + /** + * Deserializes a PropertyState from the data input stream. + * + * @param id the property id for the new property entry + * @return the property entry + * @throws IOException if an I/O error occurs. + */ + private NodePropBundle.PropertyEntry readPropertyEntryTryHarder(PropertyId id) + throws IOException { + NodePropBundle.PropertyEntry entry = new NodePropBundle.PropertyEntry(id); + + int count = 1; + if (version >= BundleBinding.VERSION_3) { + int b = in.readUnsignedByte(); + + entry.setType(b & 0x0f); + + int len = b >>> 4; + if (len != 0) { + entry.setMultiValued(true); + if (len == 0x0f) { + count = readVarInt() + 0x0f - 1; + } else { + count = len - 1; + } + } + + entry.setModCount((short) readVarInt()); + } else { + // type and modcount + int type = in.readInt(); + entry.setModCount((short) ((type >> 16) & 0x0ffff)); + type &= 0x0ffff; + entry.setType(type); + + // multiValued + entry.setMultiValued(in.readBoolean()); + + // definitionId + in.readUTF(); + + // count + count = in.readInt(); + } + + // values + InternalValue[] values = new InternalValue[count]; + String[] blobIds = new String[count]; + for (int i = 0; i < count; i++) { + InternalValue val; + switch (entry.getType()) { + case PropertyType.BINARY: + int size = in.readInt(); + if (size == BundleBinding.BINARY_IN_DATA_STORE) { + val = InternalValue.create(binding.dataStore, readString()); + } else if (size == BundleBinding.BINARY_IN_BLOB_STORE) { + blobIds[i] = readString(); + try { + BLOBStore blobStore = binding.getBlobStore(); + if (blobStore instanceof ResourceBasedBLOBStore) { + val = InternalValue.create(((ResourceBasedBLOBStore) blobStore).getResource(blobIds[i])); + } else { + val = InternalValue.create(blobStore.get(blobIds[i])); + } + } catch (IOException e) { + if (binding.errorHandling.ignoreMissingBlobs()) { + log.warn("Ignoring error while reading blob-resource: " + e); + val = InternalValue.create(new byte[0]); + } else { + throw e; + } + } catch (Exception e) { + throw new IOException("Unable to create property value: " + e.toString()); + } + } else { + // short values into memory + byte[] data = new byte[size]; + in.readFully(data); + val = InternalValue.create(data); + } + break; + case PropertyType.DOUBLE: + val = InternalValue.create(in.readDouble()); + break; + case PropertyType.DECIMAL: + val = InternalValue.create(readDecimal()); + break; + case PropertyType.LONG: + if (version >= BundleBinding.VERSION_3) { + val = InternalValue.create(readVarLong()); + } else { + val = InternalValue.create(in.readLong()); + } + break; + case PropertyType.BOOLEAN: + int b = in.read(); + if (b == 0 || b == 1) { + val = InternalValue.create(b == 1); + } else { + throw new RetryException(cin.getByteCount() - 1); + } + break; + case PropertyType.NAME: + val = InternalValue.create(readQName()); + break; + case PropertyType.WEAKREFERENCE: + val = InternalValue.create(readNodeId(), true); + break; + case PropertyType.REFERENCE: + val = InternalValue.create(readNodeId(), false); + break; + case PropertyType.DATE: + if (version >= BundleBinding.VERSION_3) { + val = InternalValue.create(readDate()); + break; + } // else fall through + default: + if (version >= BundleBinding.VERSION_3) { + val = InternalValue.valueOf( + readString(), entry.getType()); + } else { + // because writeUTF(String) has a size limit of 64k, + // Strings are serialized as + int len = in.readInt(); + byte[] bytes = new byte[len]; + in.readFully(bytes); + String stringVal = new String(bytes, "UTF-8"); + + // https://issues.apache.org/jira/browse/JCR-3083 + if (PropertyType.DATE == entry.getType()) { + val = InternalValue.createDate(stringVal); + } else { + val = InternalValue.valueOf(stringVal, entry.getType()); + } + } + } + values[i] = val; + } + entry.setValues(values); + entry.setBlobIds(blobIds); + + return entry; + } /** * Deserializes a node identifier @@ -669,5 +937,19 @@ in.readFully(bytes); return bytes; } + + static class RetryException extends IOException { + + final long errorPosition; + + RetryException(long errorPosition) { + this.errorPosition = errorPosition; + } + + long getErrorPosition() { + return errorPosition; + } + + } }