<?xml version="1.0" encoding="UTF-8" ?>

<schema name="nutch" version="1.1">

  <types>
    
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
    
    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>

    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
    
   <fieldType name="url" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" />
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>

 </types>

<fields>
  <field name="id" type="string" indexed="true" stored="true"/>
  <field name="url" type="url" indexed="true" stored="true" required="true"/>
  <field name="content" type="text" indexed="true" stored="false"/>
  <field name="segment" type="string" indexed="false" stored="true"/>
  <field name="digest" type="string" indexed="false" stored="true"/>
  <field name="host" type="url" indexed="true" stored="false"/>
  <field name="site" type="string" indexed="false" stored="true"/>
  <field name="anchor" type="text" indexed="true" stored="false" multiValued="true"/>
  <field name="title" type="text" indexed="true" stored="true"/>
  <field name="tstamp" type="long" indexed="false" stored="true"/>
  <field name="boost" type="float" indexed="false" stored="true"/>
  <field name="cache" type="string" indexed="false" stored="true"/>
 </fields>

 <uniqueKey>id</uniqueKey>

 <defaultSearchField>content</defaultSearchField>

 <solrQueryParser defaultOperator="OR"/>

 <copyField source="url" dest="id"/>

</schema>
