org.apache.lucene.spellchecker_2.9.1.v20100421-0704

16:41:14.743 INFO  jd.cli.Main - Decompiling org.apache.lucene.spellchecker_2.9.1.v20100421-0704.jar
package org.apache.lucene.search.spell;

import java.util.Iterator;

public abstract interface Dictionary
{
  public abstract Iterator getWordsIterator();
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.Dictionary
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

import java.util.Arrays;

public class JaroWinklerDistance
  implements StringDistance
{
  private float threshold = 0.7F;
  
  private int[] matches(String s1, String s2)
  {
    String min;
    String max;
    String min;
    if (s1.length() > s2.length())
    {
      String max = s1;
      min = s2;
    }
    else
    {
      max = s2;
      min = s1;
    }
    int range = Math.max(max.length() / 2 - 1, 0);
    int[] matchIndexes = new int[min.length()];
    Arrays.fill(matchIndexes, -1);
    boolean[] matchFlags = new boolean[max.length()];
    int matches = 0;
    for (int mi = 0; mi < min.length(); mi++)
    {
      char c1 = min.charAt(mi);
      int xi = Math.max(mi - range, 0);int xn = Math.min(mi + range + 1, max.length());
      for (; xi < xn; xi++) {
        if ((matchFlags[xi] == 0) && (c1 == max.charAt(xi)))
        {
          matchIndexes[mi] = xi;
          matchFlags[xi] = true;
          matches++;
          break;
        }
      }
    }
    char[] ms1 = new char[matches];
    char[] ms2 = new char[matches];
    int i = 0;
    for (int si = 0; i < min.length(); i++) {
      if (matchIndexes[i] != -1)
      {
        ms1[si] = min.charAt(i);
        si++;
      }
    }
    int i = 0;
    for (int si = 0; i < max.length(); i++) {
      if (matchFlags[i] != 0)
      {
        ms2[si] = max.charAt(i);
        si++;
      }
    }
    int transpositions = 0;
    for (int mi = 0; mi < ms1.length; mi++) {
      if (ms1[mi] != ms2[mi]) {
        transpositions++;
      }
    }
    int prefix = 0;
    for (int mi = 0; mi < min.length(); mi++)
    {
      if (s1.charAt(mi) != s2.charAt(mi)) {
        break;
      }
      prefix++;
    }
    return new int[] { matches, transpositions / 2, prefix, max.length() };
  }
  
  public float getDistance(String s1, String s2)
  {
    int[] mtp = matches(s1, s2);
    float m = mtp[0];
    if (m == 0.0F) {
      return 0.0F;
    }
    float j = (m / s1.length() + m / s2.length() + (m - mtp[1]) / m) / 3.0F;
    float jw = j < getThreshold() ? j : j + Math.min(0.1F, 1.0F / mtp[3]) * mtp[2] * (1.0F - j);
    
    return jw;
  }
  
  public void setThreshold(float threshold)
  {
    this.threshold = threshold;
  }
  
  public float getThreshold()
  {
    return threshold;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.JaroWinklerDistance
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

public final class LevensteinDistance
  implements StringDistance
{
  public float getDistance(String target, String other)
  {
    char[] sa = target.toCharArray();
    int n = sa.length;
    int[] p = new int[n + 1];
    int[] d = new int[n + 1];
    
    int m = other.length();
    if ((n == 0) || (m == 0))
    {
      if (n == m) {
        return 1.0F;
      }
      return 0.0F;
    }
    for (int i = 0; i <= n; i++) {
      p[i] = i;
    }
    for (int j = 1; j <= m; j++)
    {
      char t_j = other.charAt(j - 1);
      d[0] = j;
      for (i = 1; i <= n; i++)
      {
        int cost = sa[(i - 1)] == t_j ? 0 : 1;
        
        d[i] = Math.min(Math.min(d[(i - 1)] + 1, p[i] + 1), p[(i - 1)] + cost);
      }
      int[] _d = p;
      p = d;
      d = _d;
    }
    return 1.0F - p[n] / Math.max(other.length(), sa.length);
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.LevensteinDistance
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;

final class LuceneDictionary$LuceneIterator
  implements Iterator
{
  private TermEnum termEnum;
  private Term actualTerm;
  private boolean hasNextCalled;
  private final LuceneDictionary this$0;
  
  LuceneDictionary$LuceneIterator(LuceneDictionary arg1)
  {
    try
    {
      termEnum = LuceneDictionary.access$100(???).terms(new Term(LuceneDictionary.access$000(???)));
    }
    catch (IOException e)
    {
      throw new RuntimeException(e);
    }
  }
  
  public Object next()
  {
    if (!hasNextCalled) {
      hasNext();
    }
    hasNextCalled = false;
    try
    {
      termEnum.next();
    }
    catch (IOException e)
    {
      throw new RuntimeException(e);
    }
    return actualTerm != null ? actualTerm.text() : null;
  }
  
  public boolean hasNext()
  {
    if (hasNextCalled) {
      return actualTerm != null;
    }
    hasNextCalled = true;
    
    actualTerm = termEnum.term();
    if (actualTerm == null) {
      return false;
    }
    String currentField = actualTerm.field();
    if (currentField != LuceneDictionary.access$000(this$0))
    {
      actualTerm = null;
      return false;
    }
    return true;
  }
  
  public void remove()
  {
    throw new UnsupportedOperationException();
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.LuceneDictionary.LuceneIterator
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.StringHelper;

public class LuceneDictionary
  implements Dictionary
{
  private IndexReader reader;
  private String field;
  
  public LuceneDictionary(IndexReader reader, String field)
  {
    this.reader = reader;
    this.field = StringHelper.intern(field);
  }
  
  public final Iterator getWordsIterator()
  {
    return new LuceneIterator();
  }
  
  final class LuceneIterator
    implements Iterator
  {
    private TermEnum termEnum;
    private Term actualTerm;
    private boolean hasNextCalled;
    
    LuceneIterator()
    {
      try
      {
        termEnum = reader.terms(new Term(field));
      }
      catch (IOException e)
      {
        throw new RuntimeException(e);
      }
    }
    
    public Object next()
    {
      if (!hasNextCalled) {
        hasNext();
      }
      hasNextCalled = false;
      try
      {
        termEnum.next();
      }
      catch (IOException e)
      {
        throw new RuntimeException(e);
      }
      return actualTerm != null ? actualTerm.text() : null;
    }
    
    public boolean hasNext()
    {
      if (hasNextCalled) {
        return actualTerm != null;
      }
      hasNextCalled = true;
      
      actualTerm = termEnum.term();
      if (actualTerm == null) {
        return false;
      }
      String currentField = actualTerm.field();
      if (currentField != field)
      {
        actualTerm = null;
        return false;
      }
      return true;
    }
    
    public void remove()
    {
      throw new UnsupportedOperationException();
    }
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.LuceneDictionary
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

public class NGramDistance
  implements StringDistance
{
  private int n;
  
  public NGramDistance(int size)
  {
    n = size;
  }
  
  public NGramDistance()
  {
    this(2);
  }
  
  public float getDistance(String source, String target)
  {
    int sl = source.length();
    int tl = target.length();
    if ((sl == 0) || (tl == 0))
    {
      if (sl == tl) {
        return 1.0F;
      }
      return 0.0F;
    }
    int cost = 0;
    if ((sl < n) || (tl < n))
    {
      int i = 0;
      for (int ni = Math.min(sl, tl); i < ni; i++) {
        if (source.charAt(i) == target.charAt(i)) {
          cost++;
        }
      }
      return cost / Math.max(sl, tl);
    }
    char[] sa = new char[sl + n - 1];
    for (int i = 0; i < sa.length; i++) {
      if (i < n - 1) {
        sa[i] = '\000';
      } else {
        sa[i] = source.charAt(i - n + 1);
      }
    }
    float[] p = new float[sl + 1];
    float[] d = new float[sl + 1];
    
    char[] t_j = new char[n];
    for (int i = 0; i <= sl; i++) {
      p[i] = i;
    }
    for (int j = 1; j <= tl; j++)
    {
      if (j < n)
      {
        for (int ti = 0; ti < n - j; ti++) {
          t_j[ti] = '\000';
        }
        for (int ti = n - j; ti < n; ti++) {
          t_j[ti] = target.charAt(ti - (n - j));
        }
      }
      else
      {
        t_j = target.substring(j - n, j).toCharArray();
      }
      d[0] = j;
      for (i = 1; i <= sl; i++)
      {
        cost = 0;
        int tn = n;
        for (int ni = 0; ni < n; ni++) {
          if (sa[(i - 1 + ni)] != t_j[ni]) {
            cost++;
          } else if (sa[(i - 1 + ni)] == 0) {
            tn--;
          }
        }
        float ec = cost / tn;
        
        d[i] = Math.min(Math.min(d[(i - 1)] + 1.0F, p[i] + 1.0F), p[(i - 1)] + ec);
      }
      float[] _d = p;
      p = d;
      d = _d;
    }
    return 1.0F - p[sl] / Math.max(tl, sl);
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.NGramDistance
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Iterator;

final class PlainTextDictionary$fileIterator
  implements Iterator
{
  private final PlainTextDictionary this$0;
  
  PlainTextDictionary$fileIterator(PlainTextDictionary paramPlainTextDictionary) {}
  
  public Object next()
  {
    if (!PlainTextDictionary.access$000(this$0)) {
      hasNext();
    }
    PlainTextDictionary.access$002(this$0, false);
    return PlainTextDictionary.access$100(this$0);
  }
  
  public boolean hasNext()
  {
    PlainTextDictionary.access$002(this$0, true);
    try
    {
      PlainTextDictionary.access$102(this$0, PlainTextDictionary.access$200(this$0).readLine());
    }
    catch (IOException ex)
    {
      throw new RuntimeException(ex);
    }
    return PlainTextDictionary.access$100(this$0) != null;
  }
  
  public void remove()
  {
    throw new UnsupportedOperationException();
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.PlainTextDictionary.fileIterator
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Iterator;

public class PlainTextDictionary
  implements Dictionary
{
  private BufferedReader in;
  private String line;
  private boolean hasNextCalled;
  
  public PlainTextDictionary(File file)
    throws FileNotFoundException
  {
    in = new BufferedReader(new FileReader(file));
  }
  
  public PlainTextDictionary(InputStream dictFile)
  {
    in = new BufferedReader(new InputStreamReader(dictFile));
  }
  
  public PlainTextDictionary(Reader reader)
  {
    in = new BufferedReader(reader);
  }
  
  public Iterator getWordsIterator()
  {
    return new fileIterator();
  }
  
  final class fileIterator
    implements Iterator
  {
    fileIterator() {}
    
    public Object next()
    {
      if (!hasNextCalled) {
        hasNext();
      }
      hasNextCalled = false;
      return line;
    }
    
    public boolean hasNext()
    {
      hasNextCalled = true;
      try
      {
        line = in.readLine();
      }
      catch (IOException ex)
      {
        throw new RuntimeException(ex);
      }
      return line != null;
    }
    
    public void remove()
    {
      throw new UnsupportedOperationException();
    }
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.PlainTextDictionary
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;

public class SpellChecker
{
  public static final String F_WORD = "word";
  Directory spellIndex;
  private float bStart = 2.0F;
  private float bEnd = 1.0F;
  private IndexSearcher searcher;
  private float minScore = 0.5F;
  private StringDistance sd;
  
  public SpellChecker(Directory spellIndex, StringDistance sd)
    throws IOException
  {
    setSpellIndex(spellIndex);
    setStringDistance(sd);
  }
  
  public SpellChecker(Directory spellIndex)
    throws IOException
  {
    this(spellIndex, new LevensteinDistance());
  }
  
  public void setSpellIndex(Directory spellIndex)
    throws IOException
  {
    this.spellIndex = spellIndex;
    if (!IndexReader.indexExists(spellIndex))
    {
      IndexWriter writer = new IndexWriter(spellIndex, null, true);
      writer.close();
    }
    if (searcher != null) {
      searcher.close();
    }
    searcher = new IndexSearcher(this.spellIndex);
  }
  
  public void setStringDistance(StringDistance sd)
  {
    this.sd = sd;
  }
  
  public StringDistance getStringDistance()
  {
    return sd;
  }
  
  public void setAccuracy(float minScore)
  {
    this.minScore = minScore;
  }
  
  public String[] suggestSimilar(String word, int numSug)
    throws IOException
  {
    return suggestSimilar(word, numSug, null, null, false);
  }
  
  public String[] suggestSimilar(String word, int numSug, IndexReader ir, String field, boolean morePopular)
    throws IOException
  {
    float min = minScore;
    int lengthWord = word.length();
    
    int freq = (ir != null) && (field != null) ? ir.docFreq(new Term(field, word)) : 0;
    int goalFreq = (morePopular) && (ir != null) && (field != null) ? freq : 0;
    if ((!morePopular) && (freq > 0)) {
      return new String[] { word };
    }
    BooleanQuery query = new BooleanQuery();
    for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++)
    {
      String key = "gram" + ng;
      
      String[] grams = formGrams(word, ng);
      if (grams.length != 0)
      {
        if (bStart > 0.0F) {
          add(query, "start" + ng, grams[0], bStart);
        }
        if (bEnd > 0.0F) {
          add(query, "end" + ng, grams[(grams.length - 1)], bEnd);
        }
        for (int i = 0; i < grams.length; i++) {
          add(query, key, grams[i]);
        }
      }
    }
    Hits hits = searcher.search(query);
    
    SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
    
    int stop = Math.min(hits.length(), 10 * numSug);
    SuggestWord sugWord = new SuggestWord();
    for (int i = 0; i < stop; i++)
    {
      string = hits.doc(i).get("word");
      if (!string.equals(word))
      {
        score = sd.getDistance(word, string);
        if (score >= min) {
          if ((ir != null) && (field != null))
          {
            freq = ir.docFreq(new Term(field, string));
            if (((morePopular) && (goalFreq > freq)) || (freq < 1)) {}
          }
          else
          {
            sugQueue.insert(sugWord);
            if (sugQueue.size() == numSug) {
              min = topscore;
            }
            sugWord = new SuggestWord();
          }
        }
      }
    }
    String[] list = new String[sugQueue.size()];
    for (int i = sugQueue.size() - 1; i >= 0; i--) {
      list[i] = popstring;
    }
    return list;
  }
  
  private static void add(BooleanQuery q, String name, String value, float boost)
  {
    Query tq = new TermQuery(new Term(name, value));
    tq.setBoost(boost);
    q.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
  }
  
  private static void add(BooleanQuery q, String name, String value)
  {
    q.add(new BooleanClause(new TermQuery(new Term(name, value)), BooleanClause.Occur.SHOULD));
  }
  
  private static String[] formGrams(String text, int ng)
  {
    int len = text.length();
    String[] res = new String[len - ng + 1];
    for (int i = 0; i < len - ng + 1; i++) {
      res[i] = text.substring(i, i + ng);
    }
    return res;
  }
  
  public void clearIndex()
    throws IOException
  {
    IndexWriter writer = new IndexWriter(spellIndex, null, true);
    writer.close();
    
    searcher.close();
    searcher = new IndexSearcher(spellIndex);
  }
  
  public boolean exist(String word)
    throws IOException
  {
    return searcher.docFreq(new Term("word", word)) > 0;
  }
  
  public void indexDictionary(Dictionary dict, int mergeFactor, int ramMB)
    throws IOException
  {
    IndexWriter writer = new IndexWriter(spellIndex, true, new WhitespaceAnalyzer());
    writer.setMergeFactor(mergeFactor);
    writer.setRAMBufferSizeMB(ramMB);
    
    Iterator iter = dict.getWordsIterator();
    while (iter.hasNext())
    {
      String word = (String)iter.next();
      
      int len = word.length();
      if ((len >= 3) && 
      
        (!exist(word)))
      {
        Document doc = createDocument(word, getMin(len), getMax(len));
        writer.addDocument(doc);
      }
    }
    writer.optimize();
    writer.close();
    
    searcher.close();
    searcher = new IndexSearcher(spellIndex);
  }
  
  public void indexDictionary(Dictionary dict)
    throws IOException
  {
    indexDictionary(dict, 300, 10);
  }
  
  private int getMin(int l)
  {
    if (l > 5) {
      return 3;
    }
    if (l == 5) {
      return 2;
    }
    return 1;
  }
  
  private int getMax(int l)
  {
    if (l > 5) {
      return 4;
    }
    if (l == 5) {
      return 3;
    }
    return 2;
  }
  
  private static Document createDocument(String text, int ng1, int ng2)
  {
    Document doc = new Document();
    doc.add(new Field("word", text, Field.Store.YES, Field.Index.NOT_ANALYZED));
    addGram(text, doc, ng1, ng2);
    return doc;
  }
  
  private static void addGram(String text, Document doc, int ng1, int ng2)
  {
    int len = text.length();
    for (int ng = ng1; ng <= ng2; ng++)
    {
      String key = "gram" + ng;
      String end = null;
      for (int i = 0; i < len - ng + 1; i++)
      {
        String gram = text.substring(i, i + ng);
        doc.add(new Field(key, gram, Field.Store.NO, Field.Index.NOT_ANALYZED));
        if (i == 0) {
          doc.add(new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED));
        }
        end = gram;
      }
      if (end != null) {
        doc.add(new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED));
      }
    }
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.SpellChecker
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

public abstract interface StringDistance
{
  public abstract float getDistance(String paramString1, String paramString2);
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.StringDistance
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

final class SuggestWord
{
  public float score;
  public int freq;
  public String string;
  
  public final int compareTo(SuggestWord a)
  {
    if (score > score) {
      return 1;
    }
    if (score < score) {
      return -1;
    }
    if (freq > freq) {
      return 1;
    }
    if (freq < freq) {
      return -1;
    }
    return 0;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.SuggestWord
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
package org.apache.lucene.search.spell;

import org.apache.lucene.util.PriorityQueue;

final class SuggestWordQueue
  extends PriorityQueue
{
  SuggestWordQueue(int size)
  {
    initialize(size);
  }
  
  protected final boolean lessThan(Object a, Object b)
  {
    SuggestWord wa = (SuggestWord)a;
    SuggestWord wb = (SuggestWord)b;
    int val = wa.compareTo(wb);
    return val < 0;
  }
}

/* Location:
 * Qualified Name:     org.apache.lucene.search.spell.SuggestWordQueue
 * Java Class Version: 1.4 (48.0)
 * JD-Core Version:    0.7.1
 */
1

Further reading...

For more information on Java 1.5 Tiger, you may find Java 1.5 Tiger, A developer's Notebook by D. Flanagan and B. McLaughlin from O'Reilly of interest.

New!JAR listings


Copyright 2006-2017. Infinite Loop Ltd