Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt	(revision 1534638)
+++ lucene/CHANGES.txt	(working copy)
@@ -188,6 +188,9 @@
 
 * LUCENE-5266: Improved number of read calls and branches in DirectPackedReader. (Ryan Ernst)
 
+* LUCENE-5300: Optimized SORTED_SET storage for fields which are single-valued.
+  (Adrien Grand)
+
 Documentation
 
 * LUCENE-5211: Better javadocs and error checking of 'format' option in 
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java	(revision 1534638)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesConsumer.java	(working copy)
@@ -21,6 +21,7 @@
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
@@ -42,6 +43,7 @@
 
   static final int BLOCK_SIZE = 16384;
   static final int ADDRESS_INTERVAL = 16;
+  static final Number MISSING_ORD = Long.valueOf(-1);
 
   /** Compressed using packed blocks of ints. */
   public static final int DELTA_COMPRESSED = 0;
@@ -340,17 +342,66 @@
     addTermsDict(field, values);
     addNumericField(field, docToOrd, false);
   }
-  
+
+  private static boolean isSingleValued(Iterable<Number> docToOrdCount) {
+    for (Number ordCount : docToOrdCount) {
+      if (ordCount.longValue() > 1) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   @Override
-  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
+  public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) throws IOException {
+    if (isSingleValued(docToOrdCount)) {
+      // The field is single-valued, we can encode it as SORTED
+      addSortedField(field, values, new Iterable<Number>() {
+
+        @Override
+        public Iterator<Number> iterator() {
+          final Iterator<Number> docToOrdCountIt = docToOrdCount.iterator();
+          final Iterator<Number> ordsIt = ords.iterator();
+          return new Iterator<Number>() {
+
+            @Override
+            public boolean hasNext() {
+              assert ordsIt.hasNext() ? docToOrdCountIt.hasNext() : true;
+              return docToOrdCountIt.hasNext();
+            }
+
+            @Override
+            public Number next() {
+              final Number ordCount = docToOrdCountIt.next();
+              if (ordCount.longValue() == 0) {
+                return MISSING_ORD;
+              } else {
+                assert ordCount.longValue() == 1;
+                return ordsIt.next();
+              }
+            }
+
+            @Override
+            public void remove() {
+              throw new UnsupportedOperationException();
+            }
+
+          };
+        }
+
+      });
+      return;
+    }
+
     meta.writeVInt(field.number);
     meta.writeByte(Lucene45DocValuesFormat.SORTED_SET);
     // write the ord -> byte[] as a binary field
     addTermsDict(field, values);
+
     // write the stream of ords as a numeric field
     // NOTE: we could return an iterator that delta-encodes these within a doc
     addNumericField(field, ords, false);
-    
+
     // write the doc -> ord count as a absolute index to the stream
     meta.writeVInt(field.number);
     meta.writeByte(Lucene45DocValuesFormat.NUMERIC);
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java	(revision 1534638)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesFormat.java	(working copy)
@@ -178,7 +178,8 @@
   static final String META_CODEC = "Lucene45ValuesMetadata";
   static final String META_EXTENSION = "dvm";
   static final int VERSION_START = 0;
-  static final int VERSION_CURRENT = VERSION_START;
+  static final int VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED = 1;
+  static final int VERSION_CURRENT = VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED;
   static final byte NUMERIC = 0;
   static final byte BINARY = 1;
   static final byte SORTED = 2;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java	(revision 1534638)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java	(working copy)
@@ -40,6 +40,7 @@
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SingletonSortedSetDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
@@ -469,6 +470,12 @@
 
   @Override
   public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
+    if (!ordIndexes.containsKey(field.number)) {
+      // If we don't have ord indexes, this means that the field has been indexed single-valued
+      final SortedDocValues values = getSorted(field);
+      return new SingletonSortedSetDocValues(values);
+    }
+
     final IndexInput data = this.data.clone();
     final long valueCount = binaries.get(field.number).count;
     // we keep the byte[]s and list of ords on disk, these could be large
Index: lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
===================================================================
--- lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java	(revision 1534638)
+++ lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java	(working copy)
@@ -2028,8 +2028,8 @@
     ireader.close();
     directory.close();
   }
-  
-  private void doTestSortedSetVsStoredFields(int minLength, int maxLength) throws Exception {
+
+  private void doTestSortedSetVsStoredFields(int minLength, int maxLength, int maxValuesPerDoc) throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
@@ -2046,7 +2046,7 @@
       } else {
         length = _TestUtil.nextInt(random(), minLength, maxLength);
       }
-      int numValues = random().nextInt(17);
+      int numValues = _TestUtil.nextInt(random(), 0, maxValuesPerDoc);
       // create a random set of strings
       Set<String> values = new TreeSet<String>();
       for (int v = 0; v < numValues; v++) {
@@ -2109,7 +2109,7 @@
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
       int fixedLength = _TestUtil.nextInt(random(), 1, 10);
-      doTestSortedSetVsStoredFields(fixedLength, fixedLength);
+      doTestSortedSetVsStoredFields(fixedLength, fixedLength, 16);
     }
   }
   
@@ -2117,10 +2117,27 @@
     assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
     int numIterations = atLeast(1);
     for (int i = 0; i < numIterations; i++) {
-      doTestSortedSetVsStoredFields(1, 10);
+      doTestSortedSetVsStoredFields(1, 10, 16);
     }
   }
 
+  public void testSortedSetFixedLengthSingleValuedVsStoredFields() throws Exception {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      int fixedLength = _TestUtil.nextInt(random(), 1, 10);
+      doTestSortedSetVsStoredFields(fixedLength, fixedLength, 1);
+    }
+  }
+  
+  public void testSortedSetVariableLengthSingleValuedVsStoredFields() throws Exception {
+    assumeTrue("Codec does not support SORTED_SET", defaultCodecSupportsSortedSet());
+    int numIterations = atLeast(1);
+    for (int i = 0; i < numIterations; i++) {
+      doTestSortedSetVsStoredFields(1, 10, 1);
+    }
+  }
+
   private void assertEquals(Bits expected, Bits actual) throws Exception {
     assertEquals(expected.length(), actual.length());
     for (int i = 0; i < expected.length(); i++) {
