创建索引.
1.lucene下载.
下载地址:.
lucene不同版本之间有不小的差别,这里下载的是lucene 4.3.2.导入jar包
打开eclipse,新建dynamic web project.解压下载的lucene压缩包,依次找到下面几个jar包,加到/WebContent/WEB-INF/lib目录下,然后Add to Build Path:
包名 | 位置 |
lucene-analyzers-common-4.3.0.jar | lucene-4.3.0/analysis/common |
lucene-analyzers-smartcn-4.3.0.jar | lucene-4.3.0/analysis/smartcn |
lucene-core-4.3.0.jar | lucene-4.3.0/core |
lucene-highlighter-4.3.0.jar | lucene-4.3.0/highlighter |
lucene-queries-4.3.0.jar | lucene-4.3.0/queries |
lucene-queryparser-4.3.0.jar | lucene-4.3.0/queryparser |
3.创建索引
package ac.ucas.lucene;import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.IntField;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexCreate { public static void main(String[] args) { // TODO Auto-generated method stub // 创建标准分词器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); // 创建indexwriter配置信息 IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer); // 设置索引的打开方式 indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); // 索引的存储路径 Directory directory = null; // 索引的增删改由indexWriter创建 IndexWriter indexWriter = null; try { directory = FSDirectory.open(new File("/Users/yaopan/Documents/eclipseworkspace/test")); if (indexWriter.isLocked(directory)) { //若indexWriter锁定则解锁 indexWriter.unlock(directory); } //实例化indexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } Document doc1 = new Document(); //添加三个域 doc1.add(new StringField("id", "abcde", Store.YES)); doc1.add(new TextField("content", "极客学院", Store.YES)); doc1.add(new IntField("num", 1, Store.YES)); // 写入索引 try { indexWriter.addDocument(doc1); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } Document doc2 = new Document(); doc2.add(new StringField("id", "addff", Store.YES)); doc2.add(new TextField("content", "LUCENE案例", Store.YES)); doc2.add(new IntField("num", 2, Store.YES)); // 写入索引 try { indexWriter.addDocument(doc2); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } try { indexWriter.commit(); indexWriter.close(); directory.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("index ceate complete!"); }}
4.lucene分词器
{%codeblock lang:java lucene分词器 %}
package ac.ucas.lucene;import java.io.IOException;import java.io.StringReader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.cjk.CJKAnalyzer;import org.apache.lucene.analysis.core.KeywordAnalyzer;import org.apache.lucene.analysis.core.SimpleAnalyzer;import org.apache.lucene.analysis.core.StopAnalyzer;import org.apache.lucene.analysis.core.WhitespaceAnalyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.util.Version;import sun.dc.pr.PRError;public class AnalyerStudy { private static String str = "lucene, 全文检索框架"; public static void print(Analyzer analyzer){ StringReader stringReader=new StringReader(str); try { TokenStream tokenStream=analyzer.tokenStream(str, stringReader); tokenStream.reset(); CharTermAttribute term=tokenStream.getAttribute(CharTermAttribute.class); System.out.println("分词技术:"+analyzer.getClass()); while(tokenStream.incrementToken()){ System.out.print(term.toString()+" | "); } System.out.println("\n"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static void main(String[] args) { Analyzer analyzer=null; //标准分词 analyzer=new StandardAnalyzer(Version.LUCENE_43); print(analyzer); //空格分词 analyzer =new WhitespaceAnalyzer(Version.LUCENE_43); print(analyzer); //简单分词 analyzer=new SimpleAnalyzer(Version.LUCENE_43); print(analyzer); //二分法 analyzer=new CJKAnalyzer(Version.LUCENE_43); print(analyzer); //关键字 analyzer=new KeywordAnalyzer(); print(analyzer); // analyzer=new StopAnalyzer(Version.LUCENE_43); print(analyzer); }}
{% endcodeblock %}
分词结果:
分词技术:class org.apache.lucene.analysis.standard.StandardAnalyzer lucene | 全 | 文 | 检 | 索 | 框 | 架 | 分词技术:class org.apache.lucene.analysis.core.WhitespaceAnalyzer lucene, | 全文检索框架 | 分词技术:class org.apache.lucene.analysis.core.SimpleAnalyzer lucene | 全文检索框架 | 分词技术:class org.apache.lucene.analysis.cjk.CJKAnalyzer lucene | 全文 | 文检 | 检索 | 索框 | 框架 | 分词技术:class org.apache.lucene.analysis.core.KeywordAnalyzer lucene, 全文检索框架 | 分词技术:class org.apache.lucene.analysis.core.StopAnalyzer lucene | 全文检索框架 |5. 使用luke打开索引
Luke是一个用于Lucene搜索引擎的,方便开发和诊断的第三方工具,它可以访问现有Lucene的索引.
luke下载地址: