Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (revision 1451267) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (working copy) @@ -802,7 +802,7 @@ te = terms.iterator(te); while (te.next() != null) { String value = te.term().utf8ToString(); - CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER); + CategoryPath cp = new CategoryPath(value, delimiter); final int ordinal = addCategory(cp); docs = te.docs(null, docs, DocsEnum.FLAG_NONE); ordinalMap.addMapping(docs.nextDoc() + base, ordinal); Index: lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (revision 1451267) +++ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (working copy) @@ -36,9 +36,10 @@ *
* Originally, we used \uFFFE, officially a "unicode noncharacter" (invalid * unicode character) for this purpose. Recently, we switched to the - * "private-use" character \uF749. + * "private-use" character \uF749. Even more recently, we + * switched to \U001F (INFORMATION_SEPARATOR). */ //static final char DEFAULT_DELIMITER = '\uFFFE'; - static final char DEFAULT_DELIMITER = '\uF749'; - + //static final char DEFAULT_DELIMITER = '\uF749'; + static final char DEFAULT_DELIMITER = '\u001F'; } Index: lucene/facet/src/java/org/apache/lucene/facet/params/FacetIndexingParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/params/FacetIndexingParams.java (revision 1451267) +++ lucene/facet/src/java/org/apache/lucene/facet/params/FacetIndexingParams.java (working copy) @@ -58,7 +58,7 @@ * make sure that you return a character that's not found in any path * component. */ - public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749'; + public static final char DEFAULT_FACET_DELIM_CHAR = '\u001F'; private final int partitionSize = Integer.MAX_VALUE; Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1451267) +++ lucene/CHANGES.txt (working copy) @@ -79,6 +79,11 @@ * LUCENE-4748: A FacetRequest on a non-existent field now returns an empty FacetResult instead of skipping it. (Shai Erera, Mike McCandless) +* LUCENE-4806: The default category delimiter character was changed + from U+F749 to U+001F, since the latter uses 1 byte vs 3 bytes for + the former. Existing facet indices must be reindexed. (Robert + Muir, Shai Erera, Mike McCandless) + Optimizations * LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate