org.apache.lucene.queries_2.9.1.v20100421-0704

16:41:13.505 INFO  jd.cli.Main - Decompiling org.apache.lucene.queries_2.9.1.v20100421-0704.jar
package org.apache.lucene.search;

import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.util.SortedVIntList;

public class BooleanFilter
  extends Filter
{
  ArrayList shouldFilters = null;
  ArrayList notFilters = null;
  ArrayList mustFilters = null;
  
  private DocIdSetIterator getDISI(ArrayList filters, int index, IndexReader reader)
    throws IOException
  {
    return ((Filter)filters.get(index)).getDocIdSet(reader).iterator();
  }
  
  public DocIdSet getDocIdSet(IndexReader reader)
    throws IOException
  {
    OpenBitSetDISI res = null;
    if (shouldFilters != null) {
      for (int i = 0; i < shouldFilters.size(); i++) {
        if (res == null)
        {
          res = new OpenBitSetDISI(getDISI(shouldFilters, i, reader), reader.maxDoc());
        }
        else
        {
          DocIdSet dis = ((Filter)shouldFilters.get(i)).getDocIdSet(reader);
          if ((dis instanceof OpenBitSet)) {
            res.or((OpenBitSet)dis);
          } else {
            res.inPlaceOr(getDISI(shouldFilters, i, reader));
          }
        }
      }
    }
    if (notFilters != null) {
      for (int i = 0; i < notFilters.size(); i++) {
        if (res == null)
        {
          res = new OpenBitSetDISI(getDISI(notFilters, i, reader), reader.maxDoc());
          res.flip(0L, reader.maxDoc());
        }
        else
        {
          DocIdSet dis = ((Filter)notFilters.get(i)).getDocIdSet(reader);
          if ((dis instanceof OpenBitSet)) {
            res.andNot((OpenBitSet)dis);
          } else {
            res.inPlaceNot(getDISI(notFilters, i, reader));
          }
        }
      }
    }
    if (mustFilters != null) {
      for (int i = 0; i < mustFilters.size(); i++) {
        if (res == null)
        {
          res = new OpenBitSetDISI(getDISI(mustFilters, i, reader), reader.maxDoc());
        }
        else
        {
          DocIdSet dis = ((Filter)mustFilters.get(i)).getDocIdSet(reader);
          if ((dis instanceof OpenBitSet)) {
            res.and((OpenBitSet)dis);
          } else {
            res.inPlaceAnd(getDISI(mustFilters, i, reader));
          }
        }
      }
    }
    if (res != null) {
      return finalResult(res, reader.maxDoc());
    }
    return DocIdSet.EMPTY_DOCIDSET;
  }
  
  /**
   * @deprecated
   */
  protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs)
  {
    return result.cardinality() < maxDocs / 9 ? new SortedVIntList(result) : result;
  }
  
  public void add(FilterClause filterClause)
  {
    if (filterClause.getOccur().equals(BooleanClause.Occur.MUST))
    {
      if (mustFilters == null) {
        mustFilters = new ArrayList();
      }
      mustFilters.add(filterClause.getFilter());
    }
    if (filterClause.getOccur().equals(BooleanClause.Occur.SHOULD))
    {
      if (shouldFilters == null) {
        shouldFilters = new ArrayList();
      }
      shouldFilters.add(filterClause.getFilter());
    }
    if (filterClause.getOccur().equals(BooleanClause.Occur.MUST_NOT))
    {
      if (notFilters == null) {
        notFilters = new ArrayList();
      }
      notFilters.add(filterClause.getFilter());
    }
  }
  
  private boolean equalFilters(ArrayList filters1, ArrayList filters2)
  {
    return (filters1 == filters2) || ((filters1 != null) && (filters1.equals(filters2)));
  }
  
  public boolean equals(Object obj)
  {
    if (this == obj) {
      return true;
    }
    if ((obj == null) || (obj.getClass() != getClass())) {
      return false;
    }
    BooleanFilter other = (BooleanFilter)obj;
    return (equalFilters(notFilters, notFilters)) && (equalFilters(mustFilters, mustFilters)) && (equalFilters(shouldFilters, shouldFilters));
  }
  
  public int hashCode()
  {
    int hash = 7;
    hash = 31 * hash + (null == mustFilters ? 0 : mustFilters.hashCode());
    hash = 31 * hash + (null == notFilters ? 0 : notFilters.hashCode());
    hash = 31 * hash + (null == shouldFilters ? 0 : shouldFilters.hashCode());
    return hash;
  }
  
  public String toString()
  {
    StringBuffer buffer = new StringBuffer();
    buffer.append("BooleanFilter(");
    appendFilters(shouldFilters, "", buffer);
    appendFilters(mustFilters, "+", buffer);
    appendFilters(notFilters, "-", buffer);
    buffer.append(")");
    return buffer.toString();
  }
  
  private void appendFilters(ArrayList filters, String occurString, StringBuffer buffer)
  {
    if (filters != null) {
      for (int i = 0; i < filters.size(); i++)
      {
        buffer.append(' ');
        buffer.append(occurString);
        buffer.append(filters.get(i).toString());
      }
    }
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.BooleanFilter
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

class BoostingQuery$1$1
  extends DefaultSimilarity
{
  private final BoostingQuery.1 this$1;
  
  BoostingQuery$1$1(BoostingQuery.1 param1) {}
  
  public float coord(int overlap, int max)
  {
    switch (overlap)
    {
    case 1: 
      return 1.0F;
    case 2: 
      return BoostingQuery.access$100(BoostingQuery.1.access$000(this$1));
    }
    return 0.0F;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.BoostingQuery.1.1
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

class BoostingQuery$1
  extends BooleanQuery
{
  private final BoostingQuery this$0;
  
  BoostingQuery$1(BoostingQuery paramBoostingQuery) {}
  
  public Similarity getSimilarity(Searcher searcher)
  {
    new DefaultSimilarity()
    {
      public float coord(int overlap, int max)
      {
        switch (overlap)
        {
        case 1: 
          return 1.0F;
        case 2: 
          return BoostingQuery.access$100(this$0);
        }
        return 0.0F;
      }
    };
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.BoostingQuery.1
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import java.io.IOException;
import org.apache.lucene.index.IndexReader;

public class BoostingQuery
  extends Query
{
  private float boost;
  private Query match;
  private Query context;
  
  public BoostingQuery(Query match, Query context, float boost)
  {
    this.match = match;
    this.context = ((Query)context.clone());
    this.boost = boost;
    
    this.context.setBoost(0.0F);
  }
  
  public Query rewrite(IndexReader reader)
    throws IOException
  {
    BooleanQuery result = new BooleanQuery()
    {
      public Similarity getSimilarity(Searcher searcher)
      {
        new DefaultSimilarity()
        {
          public float coord(int overlap, int max)
          {
            switch (overlap)
            {
            case 1: 
              return 1.0F;
            case 2: 
              return boost;
            }
            return 0.0F;
          }
        };
      }
    };
    result.add(match, BooleanClause.Occur.MUST);
    result.add(context, BooleanClause.Occur.SHOULD);
    
    return result;
  }
  
  public int hashCode()
  {
    int prime = 31;
    int result = 1;
    result = 31 * result + Float.floatToIntBits(boost);
    result = 31 * result + (context == null ? 0 : context.hashCode());
    result = 31 * result + (match == null ? 0 : match.hashCode());
    return result;
  }
  
  public boolean equals(Object obj)
  {
    if (this == obj) {
      return true;
    }
    if (obj == null) {
      return false;
    }
    if (getClass() != obj.getClass()) {
      return false;
    }
    BoostingQuery other = (BoostingQuery)obj;
    if (Float.floatToIntBits(boost) != Float.floatToIntBits(boost)) {
      return false;
    }
    if (context == null)
    {
      if (context != null) {
        return false;
      }
    }
    else if (!context.equals(context)) {
      return false;
    }
    if (match == null)
    {
      if (match != null) {
        return false;
      }
    }
    else if (!match.equals(match)) {
      return false;
    }
    return true;
  }
  
  public String toString(String field)
  {
    return match.toString(field) + "/" + context.toString(field);
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.BoostingQuery
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.io.PrintStream;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;

public class DuplicateFilter
  extends Filter
{
  String fieldName;
  int keepMode = 1;
  public static final int KM_USE_FIRST_OCCURRENCE = 1;
  public static final int KM_USE_LAST_OCCURRENCE = 2;
  int processingMode = 1;
  public static final int PM_FULL_VALIDATION = 1;
  public static final int PM_FAST_INVALIDATION = 2;
  
  public DuplicateFilter(String fieldName)
  {
    this(fieldName, 2, 1);
  }
  
  public DuplicateFilter(String fieldName, int keepMode, int processingMode)
  {
    this.fieldName = fieldName;
    this.keepMode = keepMode;
    this.processingMode = processingMode;
  }
  
  public DocIdSet getDocIdSet(IndexReader reader)
    throws IOException
  {
    if (processingMode == 2) {
      return fastBits(reader);
    }
    return correctBits(reader);
  }
  
  private OpenBitSet correctBits(IndexReader reader)
    throws IOException
  {
    OpenBitSet bits = new OpenBitSet(reader.maxDoc());
    Term startTerm = new Term(fieldName);
    TermEnum te = reader.terms(startTerm);
    if (te != null)
    {
      Term currTerm = te.term();
      while ((currTerm != null) && (currTerm.field() == startTerm.field()))
      {
        int lastDoc = -1;
        
        TermDocs td = reader.termDocs(currTerm);
        if (td.next()) {
          if (keepMode == 1)
          {
            bits.set(td.doc());
          }
          else
          {
            do
            {
              lastDoc = td.doc();
            } while (td.next());
            bits.set(lastDoc);
          }
        }
        if (!te.next()) {
          break;
        }
        currTerm = te.term();
      }
    }
    return bits;
  }
  
  private OpenBitSet fastBits(IndexReader reader)
    throws IOException
  {
    OpenBitSet bits = new OpenBitSet(reader.maxDoc());
    bits.set(0L, reader.maxDoc());
    Term startTerm = new Term(fieldName);
    TermEnum te = reader.terms(startTerm);
    if (te != null)
    {
      Term currTerm = te.term();
      while ((currTerm != null) && (currTerm.field() == startTerm.field()))
      {
        if (te.docFreq() > 1)
        {
          int lastDoc = -1;
          
          TermDocs td = reader.termDocs(currTerm);
          td.next();
          if (keepMode == 1) {
            td.next();
          }
          do
          {
            lastDoc = td.doc();
            bits.clear(lastDoc);
          } while (td.next());
          if (keepMode == 2) {
            bits.set(lastDoc);
          }
        }
        if (!te.next()) {
          break;
        }
        currTerm = te.term();
      }
    }
    return bits;
  }
  
  public static void main(String[] args)
    throws Exception
  {
    IndexReader r = IndexReader.open("/indexes/personCentricAnon");
    
    long start = System.currentTimeMillis();
    
    DuplicateFilter df = new DuplicateFilter("vehicle.vrm", 2, 2);
    
    BitSet b = df.bits(r);
    long end = System.currentTimeMillis() - start;
    System.out.println(b.cardinality() + " in " + end + " ms ");
  }
  
  public String getFieldName()
  {
    return fieldName;
  }
  
  public void setFieldName(String fieldName)
  {
    this.fieldName = fieldName;
  }
  
  public int getKeepMode()
  {
    return keepMode;
  }
  
  public void setKeepMode(int keepMode)
  {
    this.keepMode = keepMode;
  }
  
  public boolean equals(Object obj)
  {
    if (this == obj) {
      return true;
    }
    if ((obj == null) || (obj.getClass() != getClass())) {
      return false;
    }
    DuplicateFilter other = (DuplicateFilter)obj;
    return (keepMode == keepMode) && (processingMode == processingMode) && ((fieldName == fieldName) || ((fieldName != null) && (fieldName.equals(fieldName))));
  }
  
  public int hashCode()
  {
    int hash = 217;
    hash = 31 * hash + keepMode;
    hash = 31 * hash + processingMode;
    hash = 31 * hash + fieldName.hashCode();
    return hash;
  }
  
  public int getProcessingMode()
  {
    return processingMode;
  }
  
  public void setProcessingMode(int processingMode)
  {
    this.processingMode = processingMode;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.DuplicateFilter
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import java.io.Serializable;

public class FilterClause
  implements Serializable
{
  BooleanClause.Occur occur = null;
  Filter filter = null;
  
  public FilterClause(Filter filter, BooleanClause.Occur occur)
  {
    this.occur = occur;
    this.filter = filter;
  }
  
  public Filter getFilter()
  {
    return filter;
  }
  
  public BooleanClause.Occur getOccur()
  {
    return occur;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.FilterClause
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

class FuzzyLikeThisQuery$FieldVals
{
  String queryString;
  String fieldName;
  float minSimilarity;
  int prefixLength;
  private final FuzzyLikeThisQuery this$0;
  
  public FuzzyLikeThisQuery$FieldVals(FuzzyLikeThisQuery paramFuzzyLikeThisQuery, String name, float similarity, int length, String queryString)
  {
    fieldName = name;
    minSimilarity = similarity;
    prefixLength = length;
    this.queryString = queryString;
  }
  
  public int hashCode()
  {
    int prime = 31;
    int result = 1;
    result = 31 * result + (fieldName == null ? 0 : fieldName.hashCode());
    
    result = 31 * result + Float.floatToIntBits(minSimilarity);
    result = 31 * result + prefixLength;
    result = 31 * result + (queryString == null ? 0 : queryString.hashCode());
    
    return result;
  }
  
  public boolean equals(Object obj)
  {
    if (this == obj) {
      return true;
    }
    if (obj == null) {
      return false;
    }
    if (getClass() != obj.getClass()) {
      return false;
    }
    FieldVals other = (FieldVals)obj;
    if (fieldName == null)
    {
      if (fieldName != null) {
        return false;
      }
    }
    else if (!fieldName.equals(fieldName)) {
      return false;
    }
    if (Float.floatToIntBits(minSimilarity) != Float.floatToIntBits(minSimilarity)) {
      return false;
    }
    if (prefixLength != prefixLength) {
      return false;
    }
    if (queryString == null)
    {
      if (queryString != null) {
        return false;
      }
    }
    else if (!queryString.equals(queryString)) {
      return false;
    }
    return true;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.FuzzyLikeThisQuery.FieldVals
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

class FuzzyLikeThisQuery$FuzzyTermQuery$1
  extends SimilarityDelegator
{
  private final FuzzyLikeThisQuery.FuzzyTermQuery this$0;
  
  FuzzyLikeThisQuery$FuzzyTermQuery$1(FuzzyLikeThisQuery.FuzzyTermQuery paramFuzzyTermQuery, Similarity x0)
  {
    super(x0);
  }
  
  public float tf(float freq)
  {
    if (this$0.ignoreTF) {
      return 1.0F;
    }
    return super.tf(freq);
  }
  
  public float idf(int docFreq, int numDocs)
  {
    return 1.0F;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.FuzzyLikeThisQuery.FuzzyTermQuery.1
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import org.apache.lucene.index.Term;

class FuzzyLikeThisQuery$FuzzyTermQuery
  extends TermQuery
{
  boolean ignoreTF;
  
  public FuzzyLikeThisQuery$FuzzyTermQuery(Term t, boolean ignoreTF)
  {
    super(t);
    this.ignoreTF = ignoreTF;
  }
  
  public Similarity getSimilarity(Searcher searcher)
  {
    Similarity result = super.getSimilarity(searcher);
    result = new SimilarityDelegator(result)
    {
      public float tf(float freq)
      {
        if (ignoreTF) {
          return 1.0F;
        }
        return super.tf(freq);
      }
      
      public float idf(int docFreq, int numDocs)
      {
        return 1.0F;
      }
    };
    return result;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.FuzzyLikeThisQuery.FuzzyTermQuery
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import org.apache.lucene.index.Term;

class FuzzyLikeThisQuery$ScoreTerm
{
  public Term term;
  public float score;
  Term fuzziedSourceTerm;
  
  public FuzzyLikeThisQuery$ScoreTerm(Term term, float score, Term fuzziedSourceTerm)
  {
    this.term = term;
    this.score = score;
    this.fuzziedSourceTerm = fuzziedSourceTerm;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.FuzzyLikeThisQuery.ScoreTerm
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;

class FuzzyLikeThisQuery$ScoreTermQueue
  extends PriorityQueue
{
  public FuzzyLikeThisQuery$ScoreTermQueue(int size)
  {
    initialize(size);
  }
  
  protected boolean lessThan(Object a, Object b)
  {
    FuzzyLikeThisQuery.ScoreTerm termA = (FuzzyLikeThisQuery.ScoreTerm)a;
    FuzzyLikeThisQuery.ScoreTerm termB = (FuzzyLikeThisQuery.ScoreTerm)b;
    if (score == score) {
      return term.compareTo(term) > 0;
    }
    return score < score;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.FuzzyLikeThisQuery.ScoreTermQueue
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.PriorityQueue;

public class FuzzyLikeThisQuery
  extends Query
{
  static Similarity sim = new DefaultSimilarity();
  Query rewrittenQuery = null;
  ArrayList fieldVals = new ArrayList();
  Analyzer analyzer;
  ScoreTermQueue q;
  int MAX_VARIANTS_PER_TERM = 50;
  boolean ignoreTF = false;
  private int maxNumTerms;
  
  public int hashCode()
  {
    int prime = 31;
    int result = 1;
    result = 31 * result + (analyzer == null ? 0 : analyzer.hashCode());
    result = 31 * result + (fieldVals == null ? 0 : fieldVals.hashCode());
    
    result = 31 * result + (ignoreTF ? 1231 : 1237);
    result = 31 * result + maxNumTerms;
    return result;
  }
  
  public boolean equals(Object obj)
  {
    if (this == obj) {
      return true;
    }
    if (obj == null) {
      return false;
    }
    if (getClass() != obj.getClass()) {
      return false;
    }
    FuzzyLikeThisQuery other = (FuzzyLikeThisQuery)obj;
    if (analyzer == null)
    {
      if (analyzer != null) {
        return false;
      }
    }
    else if (!analyzer.equals(analyzer)) {
      return false;
    }
    if (fieldVals == null)
    {
      if (fieldVals != null) {
        return false;
      }
    }
    else if (!fieldVals.equals(fieldVals)) {
      return false;
    }
    if (ignoreTF != ignoreTF) {
      return false;
    }
    if (maxNumTerms != maxNumTerms) {
      return false;
    }
    return true;
  }
  
  public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer)
  {
    q = new ScoreTermQueue(maxNumTerms);
    this.analyzer = analyzer;
    this.maxNumTerms = maxNumTerms;
  }
  
  class FieldVals
  {
    String queryString;
    String fieldName;
    float minSimilarity;
    int prefixLength;
    
    public FieldVals(String name, float similarity, int length, String queryString)
    {
      fieldName = name;
      minSimilarity = similarity;
      prefixLength = length;
      this.queryString = queryString;
    }
    
    public int hashCode()
    {
      int prime = 31;
      int result = 1;
      result = 31 * result + (fieldName == null ? 0 : fieldName.hashCode());
      
      result = 31 * result + Float.floatToIntBits(minSimilarity);
      result = 31 * result + prefixLength;
      result = 31 * result + (queryString == null ? 0 : queryString.hashCode());
      
      return result;
    }
    
    public boolean equals(Object obj)
    {
      if (this == obj) {
        return true;
      }
      if (obj == null) {
        return false;
      }
      if (getClass() != obj.getClass()) {
        return false;
      }
      FieldVals other = (FieldVals)obj;
      if (fieldName == null)
      {
        if (fieldName != null) {
          return false;
        }
      }
      else if (!fieldName.equals(fieldName)) {
        return false;
      }
      if (Float.floatToIntBits(minSimilarity) != Float.floatToIntBits(minSimilarity)) {
        return false;
      }
      if (prefixLength != prefixLength) {
        return false;
      }
      if (queryString == null)
      {
        if (queryString != null) {
          return false;
        }
      }
      else if (!queryString.equals(queryString)) {
        return false;
      }
      return true;
    }
  }
  
  public void addTerms(String queryString, String fieldName, float minSimilarity, int prefixLength)
  {
    fieldVals.add(new FieldVals(fieldName, minSimilarity, prefixLength, queryString));
  }
  
  private void addTerms(IndexReader reader, FieldVals f)
    throws IOException
  {
    if (queryString == null) {
      return;
    }
    TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(queryString));
    TermAttribute termAtt = (TermAttribute)ts.addAttribute(TermAttribute.class);
    
    int corpusNumDocs = reader.numDocs();
    Term internSavingTemplateTerm = new Term(fieldName);
    HashSet processedTerms = new HashSet();
    while (ts.incrementToken())
    {
      String term = termAtt.term();
      if (!processedTerms.contains(term))
      {
        processedTerms.add(term);
        ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM);
        float minScore = 0.0F;
        Term startTerm = internSavingTemplateTerm.createTerm(term);
        FuzzyTermEnum fe = new FuzzyTermEnum(reader, startTerm, minSimilarity, prefixLength);
        TermEnum origEnum = reader.terms(startTerm);
        int df = 0;
        if (startTerm.equals(origEnum.term())) {
          df = origEnum.docFreq();
        }
        int numVariants = 0;
        int totalVariantDocFreqs = 0;
        do
        {
          Term possibleMatch = fe.term();
          if (possibleMatch != null)
          {
            numVariants++;
            totalVariantDocFreqs += fe.docFreq();
            float score = fe.difference();
            if ((variantsQ.size() < MAX_VARIANTS_PER_TERM) || (score > minScore))
            {
              ScoreTerm st = new ScoreTerm(possibleMatch, score, startTerm);
              variantsQ.insert(st);
              minScore = topscore;
            }
          }
        } while (fe.next());
        if (numVariants > 0)
        {
          int avgDf = totalVariantDocFreqs / numVariants;
          if (df == 0) {
            df = avgDf;
          }
          int size = variantsQ.size();
          for (int i = 0; i < size; i++)
          {
            ScoreTerm st = (ScoreTerm)variantsQ.pop();
            score = (score * score * sim.idf(df, corpusNumDocs));
            q.insert(st);
          }
        }
      }
    }
  }
  
  public Query rewrite(IndexReader reader)
    throws IOException
  {
    if (rewrittenQuery != null) {
      return rewrittenQuery;
    }
    for (Iterator iter = fieldVals.iterator(); iter.hasNext();)
    {
      FieldVals f = (FieldVals)iter.next();
      addTerms(reader, f);
    }
    fieldVals.clear();
    
    BooleanQuery bq = new BooleanQuery();
    
    HashMap variantQueries = new HashMap();
    int size = q.size();
    for (int i = 0; i < size; i++)
    {
      ScoreTerm st = (ScoreTerm)q.pop();
      ArrayList l = (ArrayList)variantQueries.get(fuzziedSourceTerm);
      if (l == null)
      {
        l = new ArrayList();
        variantQueries.put(fuzziedSourceTerm, l);
      }
      l.add(st);
    }
    for (Iterator iter = variantQueries.values().iterator(); iter.hasNext();)
    {
      ArrayList variants = (ArrayList)iter.next();
      if (variants.size() == 1)
      {
        ScoreTerm st = (ScoreTerm)variants.get(0);
        TermQuery tq = new FuzzyTermQuery(term, ignoreTF);
        tq.setBoost(score);
        bq.add(tq, BooleanClause.Occur.SHOULD);
      }
      else
      {
        BooleanQuery termVariants = new BooleanQuery(true);
        Iterator iterator2 = variants.iterator();
        while (iterator2.hasNext())
        {
          ScoreTerm st = (ScoreTerm)iterator2.next();
          TermQuery tq = new FuzzyTermQuery(term, ignoreTF);
          tq.setBoost(score);
          termVariants.add(tq, BooleanClause.Occur.SHOULD);
        }
        bq.add(termVariants, BooleanClause.Occur.SHOULD);
      }
    }
    bq.setBoost(getBoost());
    rewrittenQuery = bq;
    return bq;
  }
  
  private static class ScoreTerm
  {
    public Term term;
    public float score;
    Term fuzziedSourceTerm;
    
    public ScoreTerm(Term term, float score, Term fuzziedSourceTerm)
    {
      this.term = term;
      this.score = score;
      this.fuzziedSourceTerm = fuzziedSourceTerm;
    }
  }
  
  private static class ScoreTermQueue
    extends PriorityQueue
  {
    public ScoreTermQueue(int size)
    {
      initialize(size);
    }
    
    protected boolean lessThan(Object a, Object b)
    {
      FuzzyLikeThisQuery.ScoreTerm termA = (FuzzyLikeThisQuery.ScoreTerm)a;
      FuzzyLikeThisQuery.ScoreTerm termB = (FuzzyLikeThisQuery.ScoreTerm)b;
      if (score == score) {
        return term.compareTo(term) > 0;
      }
      return score < score;
    }
  }
  
  private static class FuzzyTermQuery
    extends TermQuery
  {
    boolean ignoreTF;
    
    public FuzzyTermQuery(Term t, boolean ignoreTF)
    {
      super();
      this.ignoreTF = ignoreTF;
    }
    
    public Similarity getSimilarity(Searcher searcher)
    {
      Similarity result = super.getSimilarity(searcher);
      result = new SimilarityDelegator(result)
      {
        public float tf(float freq)
        {
          if (ignoreTF) {
            return 1.0F;
          }
          return super.tf(freq);
        }
        
        public float idf(int docFreq, int numDocs)
        {
          return 1.0F;
        }
      };
      return result;
    }
  }
  
  public String toString(String field)
  {
    return null;
  }
  
  public boolean isIgnoreTF()
  {
    return ignoreTF;
  }
  
  public void setIgnoreTF(boolean ignoreTF)
  {
    this.ignoreTF = ignoreTF;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.FuzzyLikeThisQuery
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.BitSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.util.OpenBitSet;

public class TermsFilter
  extends Filter
{
  Set terms = new TreeSet();
  
  public void addTerm(Term term)
  {
    terms.add(term);
  }
  
  public BitSet bits(IndexReader reader)
    throws IOException
  {
    BitSet result = new BitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();
    try
    {
      for (iter = terms.iterator(); iter.hasNext();)
      {
        Term term = (Term)iter.next();
        td.seek(term);
        while (td.next()) {
          result.set(td.doc());
        }
      }
    }
    finally
    {
      Iterator iter;
      td.close();
    }
    return result;
  }
  
  public DocIdSet getDocIdSet(IndexReader reader)
    throws IOException
  {
    OpenBitSet result = new OpenBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();
    try
    {
      for (iter = terms.iterator(); iter.hasNext();)
      {
        Term term = (Term)iter.next();
        td.seek(term);
        while (td.next()) {
          result.set(td.doc());
        }
      }
    }
    finally
    {
      Iterator iter;
      td.close();
    }
    return result;
  }
  
  public boolean equals(Object obj)
  {
    if (this == obj) {
      return true;
    }
    if ((obj == null) || (obj.getClass() != getClass())) {
      return false;
    }
    TermsFilter test = (TermsFilter)obj;
    return (terms == terms) || ((terms != null) && (terms.equals(terms)));
  }
  
  public int hashCode()
  {
    int hash = 9;
    for (Iterator iter = terms.iterator(); iter.hasNext();)
    {
      Term term = (Term)iter.next();
      hash = 31 * hash + term.hashCode();
    }
    return hash;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.TermsFilter
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.similar;

import org.apache.lucene.util.PriorityQueue;

class MoreLikeThis$FreqQ
  extends PriorityQueue
{
  MoreLikeThis$FreqQ(int s)
  {
    initialize(s);
  }
  
  protected boolean lessThan(Object a, Object b)
  {
    Object[] aa = (Object[])a;
    Object[] bb = (Object[])b;
    Float fa = (Float)aa[2];
    Float fb = (Float)bb[2];
    return fa.floatValue() > fb.floatValue();
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.similar.MoreLikeThis.FreqQ
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.similar;

class MoreLikeThis$Int
{
  int x;
  
  MoreLikeThis$Int()
  {
    x = 1;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.similar.MoreLikeThis.Int
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.similar;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.PriorityQueue;

public final class MoreLikeThis
{
  public static final int DEFAULT_MAX_NUM_TOKENS_PARSED = 5000;
  public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer();
  public static final int DEFAULT_MIN_TERM_FREQ = 2;
  public static final int DEFAULT_MIN_DOC_FREQ = 5;
  public static final boolean DEFAULT_BOOST = false;
  public static final String[] DEFAULT_FIELD_NAMES = { "contents" };
  public static final int DEFAULT_MIN_WORD_LENGTH = 0;
  public static final int DEFAULT_MAX_WORD_LENGTH = 0;
  public static final Set DEFAULT_STOP_WORDS = null;
  private Set stopWords = DEFAULT_STOP_WORDS;
  public static final int DEFAULT_MAX_QUERY_TERMS = 25;
  private Analyzer analyzer = DEFAULT_ANALYZER;
  private int minTermFreq = 2;
  private int minDocFreq = 5;
  private boolean boost = false;
  private String[] fieldNames = DEFAULT_FIELD_NAMES;
  private int maxNumTokensParsed = 5000;
  private int minWordLen = 0;
  private int maxWordLen = 0;
  private int maxQueryTerms = 25;
  private Similarity similarity;
  private final IndexReader ir;
  private float boostFactor = 1.0F;
  
  public float getBoostFactor()
  {
    return boostFactor;
  }
  
  public void setBoostFactor(float boostFactor)
  {
    this.boostFactor = boostFactor;
  }
  
  public MoreLikeThis(IndexReader ir)
  {
    this(ir, new DefaultSimilarity());
  }
  
  public MoreLikeThis(IndexReader ir, Similarity sim)
  {
    this.ir = ir;
    similarity = sim;
  }
  
  public Similarity getSimilarity()
  {
    return similarity;
  }
  
  public void setSimilarity(Similarity similarity)
  {
    this.similarity = similarity;
  }
  
  public Analyzer getAnalyzer()
  {
    return analyzer;
  }
  
  public void setAnalyzer(Analyzer analyzer)
  {
    this.analyzer = analyzer;
  }
  
  public int getMinTermFreq()
  {
    return minTermFreq;
  }
  
  public void setMinTermFreq(int minTermFreq)
  {
    this.minTermFreq = minTermFreq;
  }
  
  public int getMinDocFreq()
  {
    return minDocFreq;
  }
  
  public void setMinDocFreq(int minDocFreq)
  {
    this.minDocFreq = minDocFreq;
  }
  
  public boolean isBoost()
  {
    return boost;
  }
  
  public void setBoost(boolean boost)
  {
    this.boost = boost;
  }
  
  public String[] getFieldNames()
  {
    return fieldNames;
  }
  
  public void setFieldNames(String[] fieldNames)
  {
    this.fieldNames = fieldNames;
  }
  
  public int getMinWordLen()
  {
    return minWordLen;
  }
  
  public void setMinWordLen(int minWordLen)
  {
    this.minWordLen = minWordLen;
  }
  
  public int getMaxWordLen()
  {
    return maxWordLen;
  }
  
  public void setMaxWordLen(int maxWordLen)
  {
    this.maxWordLen = maxWordLen;
  }
  
  public void setStopWords(Set stopWords)
  {
    this.stopWords = stopWords;
  }
  
  public Set getStopWords()
  {
    return stopWords;
  }
  
  public int getMaxQueryTerms()
  {
    return maxQueryTerms;
  }
  
  public void setMaxQueryTerms(int maxQueryTerms)
  {
    this.maxQueryTerms = maxQueryTerms;
  }
  
  public int getMaxNumTokensParsed()
  {
    return maxNumTokensParsed;
  }
  
  public void setMaxNumTokensParsed(int i)
  {
    maxNumTokensParsed = i;
  }
  
  public Query like(int docNum)
    throws IOException
  {
    if (fieldNames == null)
    {
      Collection fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
      fieldNames = ((String[])fields.toArray(new String[fields.size()]));
    }
    return createQuery(retrieveTerms(docNum));
  }
  
  public Query like(File f)
    throws IOException
  {
    if (fieldNames == null)
    {
      Collection fields = ir.getFieldNames(IndexReader.FieldOption.INDEXED);
      fieldNames = ((String[])fields.toArray(new String[fields.size()]));
    }
    return like(new FileReader(f));
  }
  
  public Query like(URL u)
    throws IOException
  {
    return like(new InputStreamReader(u.openConnection().getInputStream()));
  }
  
  public Query like(InputStream is)
    throws IOException
  {
    return like(new InputStreamReader(is));
  }
  
  public Query like(Reader r)
    throws IOException
  {
    return createQuery(retrieveTerms(r));
  }
  
  private Query createQuery(PriorityQueue q)
  {
    BooleanQuery query = new BooleanQuery();
    
    int qterms = 0;
    float bestScore = 0.0F;
    Object cur;
    while ((cur = q.pop()) != null)
    {
      Object[] ar = (Object[])cur;
      TermQuery tq = new TermQuery(new Term((String)ar[1], (String)ar[0]));
      if (boost)
      {
        if (qterms == 0) {
          bestScore = ((Float)ar[2]).floatValue();
        }
        float myScore = ((Float)ar[2]).floatValue();
        
        tq.setBoost(boostFactor * myScore / bestScore);
      }
      try
      {
        query.add(tq, BooleanClause.Occur.SHOULD);
      }
      catch (BooleanQuery.TooManyClauses ignore)
      {
        break;
      }
      qterms++;
      if ((maxQueryTerms > 0) && (qterms >= maxQueryTerms)) {
        break;
      }
    }
    return query;
  }
  
  private PriorityQueue createQueue(Map words)
    throws IOException
  {
    int numDocs = ir.numDocs();
    FreqQ res = new FreqQ(words.size());
    
    Iterator it = words.keySet().iterator();
    while (it.hasNext())
    {
      String word = (String)it.next();
      
      int tf = getx;
      if ((minTermFreq <= 0) || (tf >= minTermFreq))
      {
        String topField = fieldNames[0];
        int docFreq = 0;
        for (int i = 0; i < fieldNames.length; i++)
        {
          int freq = ir.docFreq(new Term(fieldNames[i], word));
          topField = freq > docFreq ? fieldNames[i] : topField;
          docFreq = freq > docFreq ? freq : docFreq;
        }
        if (((minDocFreq <= 0) || (docFreq >= minDocFreq)) && 
        
          (docFreq != 0))
        {
          float idf = similarity.idf(docFreq, numDocs);
          float score = tf * idf;
          
          res.insert(new Object[] { word, topField, new Float(score), new Float(idf), new Integer(docFreq), new Integer(tf) });
        }
      }
    }
    return res;
  }
  
  public String describeParams()
  {
    StringBuffer sb = new StringBuffer();
    sb.append("\tmaxQueryTerms  : " + maxQueryTerms + "\n");
    sb.append("\tminWordLen     : " + minWordLen + "\n");
    sb.append("\tmaxWordLen     : " + maxWordLen + "\n");
    sb.append("\tfieldNames     : ");
    String delim = "";
    for (int i = 0; i < fieldNames.length; i++)
    {
      String fieldName = fieldNames[i];
      sb.append(delim).append(fieldName);
      delim = ", ";
    }
    sb.append("\n");
    sb.append("\tboost          : " + boost + "\n");
    sb.append("\tminTermFreq    : " + minTermFreq + "\n");
    sb.append("\tminDocFreq     : " + minDocFreq + "\n");
    return sb.toString();
  }
  
  public static void main(String[] a)
    throws Throwable
  {
    String indexName = "localhost_index";
    String fn = "c:/Program Files/Apache Group/Apache/htdocs/manual/vhosts/index.html.en";
    URL url = null;
    for (int i = 0; i < a.length; i++) {
      if (a[i].equals("-i")) {
        indexName = a[(++i)];
      } else if (a[i].equals("-f")) {
        fn = a[(++i)];
      } else if (a[i].equals("-url")) {
        url = new URL(a[(++i)]);
      }
    }
    PrintStream o = System.out;
    IndexReader r = IndexReader.open(indexName);
    o.println("Open index " + indexName + " which has " + r.numDocs() + " docs");
    
    MoreLikeThis mlt = new MoreLikeThis(r);
    
    o.println("Query generation parameters:");
    o.println(mlt.describeParams());
    o.println();
    
    Query query = null;
    if (url != null)
    {
      o.println("Parsing URL: " + url);
      query = mlt.like(url);
    }
    else if (fn != null)
    {
      o.println("Parsing file: " + fn);
      query = mlt.like(new File(fn));
    }
    o.println("q: " + query);
    o.println();
    IndexSearcher searcher = new IndexSearcher(indexName);
    
    Hits hits = searcher.search(query);
    int len = hits.length();
    o.println("found: " + len + " documents matching");
    o.println();
    for (int i = 0; i < Math.min(25, len); i++)
1 2

Further reading...

For more information on Java 1.5 Tiger, you may find Java 1.5 Tiger, A developer's Notebook by D. Flanagan and B. McLaughlin from O'Reilly of interest.

New!JAR listings


Copyright 2006-2017. Infinite Loop Ltd