Skip to content

Configuring ZipNumCluster

Kristinn Sigurðsson edited this page Jul 24, 2014 · 7 revisions

Configuring ZipNumCluster

For information on ZipNum format http://aaron.blog.archive.org/2013/05/28/zipnum-and-cdx-cluster-merging/

Enable and edit CDXCollection.xml as follows:

    <property name="resourceIndex">
      <bean class="org.archive.wayback.resourceindex.LocalResourceIndex">
        <property name="canonicalizer" ref="waybackCanonicalizer" />
        <property name="source">

        <bean class="org.archive.wayback.resourceindex.ZipNumClusterSearchResultSource">
                <property name="cluster">
                        <bean class="org.archive.format.gzip.zipnum.ZipNumCluster">
                                <property name="summaryFile" value="/<PATH-TO-SUMMARYFILE>"/>
                        </bean>
                </property>
                <property name="params">
                        <bean class="org.archive.format.gzip.zipnum.ZipNumParams"/>
                </property>
        </bean>

        </property>
        <property name="maxRecords" value="100000" />
        <property name="dedupeRecords" value="true" />    
      </bean>
    </property>
Clone this wiki locally