Lucene4.3入门
【摘要】 辞职交接期间无聊看了一下搜索引擎,java社区比较火的当然是Lucene,想写一个简单的小例子,在网上找了些资料,不过都不是4.3的,自己看了一下。
下载地址:http://lucene.apache.org/core/
项目结构
constans.java 是常量类
LuceneIndex.java 建立索引类
...
辞职交接期间无聊看了一下搜索引擎,java社区比较火的当然是Lucene,想写一个简单的小例子,在网上找了些资料,不过都不是4.3的,自己看了一下。
下载地址:http://lucene.apache.org/core/
项目结构
constans.java 是常量类
LuceneIndex.java 建立索引类
LuceneSearch.java 搜索类
数据文件:
-
package com.xin;
-
-
public class Constants {
-
public final static String INDEX_FILE_PATH = "e:\\lucene\\test"; //索引的文件的存放路径
-
public final static String INDEX_STORE_PATH = "e:\\lucene\\index"; //索引的存放位置
-
}
-
package com.xin;
-
-
import java.io.BufferedReader;
-
import java.io.File;
-
import java.io.FileInputStream;
-
import java.io.InputStreamReader;
-
import java.io.Reader;
-
import java.util.Date;
-
-
import org.apache.lucene.analysis.Analyzer;
-
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-
import org.apache.lucene.document.Document;
-
import org.apache.lucene.document.Field;
-
import org.apache.lucene.document.StringField;
-
import org.apache.lucene.document.TextField;
-
import org.apache.lucene.index.IndexWriter;
-
import org.apache.lucene.index.IndexWriterConfig;
-
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-
import org.apache.lucene.store.Directory;
-
import org.apache.lucene.store.FSDirectory;
-
import org.apache.lucene.util.Version;
-
/**
-
* @author chongxin
-
* @since 2013/6/19
-
* @version Lucene 4.3.1
-
* */
-
public class LuceneIndex {
-
// 索引器
-
private IndexWriter writer = null;
-
public LuceneIndex() {
-
try {
-
//索引文件的保存位置
-
Directory dir = FSDirectory.open(new File(Constants.INDEX_STORE_PATH));
-
//分析器
-
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
-
//配置类
-
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,analyzer);
-
iwc.setOpenMode(OpenMode.CREATE);//创建模式 OpenMode.CREATE_OR_APPEND 添加模式
-
-
writer = new IndexWriter(dir, iwc);
-
} catch (Exception e) {
-
e.printStackTrace();
-
}
-
}
-
-
// 将要建立索引的文件构造成一个Document对象,并添加一个域"content"
-
private Document getDocument(File f) throws Exception {
-
Document doc = new Document();
-
-
FileInputStream is = new FileInputStream(f);
-
Reader reader = new BufferedReader(new InputStreamReader(is));
-
//字符串 StringField LongField TextField
-
Field pathField = new StringField("path", f.getAbsolutePath(),Field.Store.YES);
-
Field contenField = new TextField("contents", reader);
-
//添加字段
-
doc.add(contenField);
-
doc.add(pathField);
-
return doc;
-
}
-
-
public void writeToIndex() throws Exception {
-
File folder = new File(Constants.INDEX_FILE_PATH);
-
-
if (folder.isDirectory()) {
-
String[] files = folder.list();
-
for (int i = 0; i < files.length; i++) {
-
File file = new File(folder, files[i]);
-
Document doc = getDocument(file);
-
System.out.println("正在建立索引 : " + file + "");
-
writer.addDocument(doc);
-
}
-
}
-
}
-
-
public void close() throws Exception {
-
writer.close();
-
}
-
-
public static void main(String[] args) throws Exception {
-
// 声明一个对象
-
LuceneIndex indexer = new LuceneIndex();
-
// 建立索引
-
Date start = new Date();
-
indexer.writeToIndex();
-
Date end = new Date();
-
-
System.out.println("建立索引用时" + (end.getTime() - start.getTime()) + "毫秒");
-
-
indexer.close();
-
}
-
}
-
正在建立索引 : e:\lucene\test\a.txt
-
正在建立索引 : e:\lucene\test\b.txt
-
正在建立索引 : e:\lucene\test\c.txt
-
正在建立索引 : e:\lucene\test\d.txt
-
建立索引用时109毫秒
生成的索引文件:
查找:
-
package com.xin;
-
-
import java.io.File;
-
import java.util.Date;
-
-
import org.apache.lucene.analysis.Analyzer;
-
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-
import org.apache.lucene.document.Document;
-
import org.apache.lucene.index.DirectoryReader;
-
import org.apache.lucene.index.IndexReader;
-
import org.apache.lucene.queryparser.classic.QueryParser;
-
import org.apache.lucene.search.IndexSearcher;
-
import org.apache.lucene.search.Query;
-
import org.apache.lucene.search.ScoreDoc;
-
import org.apache.lucene.search.TopDocs;
-
import org.apache.lucene.store.FSDirectory;
-
import org.apache.lucene.util.Version;
-
-
/**
-
* @author chongxin
-
* @since 2013/6/19
-
* @version Lucene 4.3.1
-
* */
-
public class LuceneSearch {
-
// 声明一个IndexSearcher对象
-
private IndexSearcher searcher = null;
-
// 声明一个Query对象
-
private Query query = null;
-
private String field = "contents";
-
-
public LuceneSearch() {
-
try {
-
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(Constants.INDEX_STORE_PATH)));
-
searcher = new IndexSearcher(reader);
-
} catch (Exception e) {
-
e.printStackTrace();
-
}
-
}
-
//返回查询结果
-
public final TopDocs search(String keyword) {
-
System.out.println("正在检索关键字 : " + keyword);
-
try {
-
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
-
QueryParser parser = new QueryParser(Version.LUCENE_40, field,analyzer);
-
// 将关键字包装成Query对象
-
query = parser.parse(keyword);
-
Date start = new Date();
-
TopDocs results = searcher.search(query, 5 * 2);
-
Date end = new Date();
-
System.out.println("检索完成,用时" + (end.getTime() - start.getTime())
-
+ "毫秒");
-
return results;
-
} catch (Exception e) {
-
e.printStackTrace();
-
return null;
-
}
-
}
-
//打印结果
-
public void printResult(TopDocs results) {
-
ScoreDoc[] h = results.scoreDocs;
-
if (h.length == 0) {
-
System.out.println("对不起,没有找到您要的结果。");
-
} else {
-
for (int i = 0; i < h.length; i++) {
-
try {
-
Document doc = searcher.doc(h[i].doc);
-
System.out.print("这是第" + i + "个检索到的结果,文件名为:");
-
System.out.println(doc.get("path"));
-
} catch (Exception e) {
-
e.printStackTrace();
-
}
-
}
-
}
-
System.out.println("--------------------------");
-
}
-
-
public static void main(String[] args) throws Exception {
-
LuceneSearch test = new LuceneSearch();
-
TopDocs h = null;
-
h = test.search("中国");
-
test.printResult(h);
-
h = test.search("人民");
-
test.printResult(h);
-
h = test.search("共和国");
-
test.printResult(h);
-
}
-
-
}
文章来源: gamwatcher.blog.csdn.net,作者:香菜聊游戏,版权归原作者所有,如需转载,请联系作者。
原文链接:gamwatcher.blog.csdn.net/article/details/9128323
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)