lucene 5.5.1使用
【摘要】 lucene-analyzers-common-5.5.1.jarlucene-core-5.5.1.jarlucene-queries-5.5.1.jarlucene-queryparser-5.5.1.jar创建索引public static void creatMysqlIndex(String configPath, Boolean type) {try {reader.close(...
lucene-analyzers-common-5.5.1.jar
lucene-core-5.5.1.jar
lucene-queries-5.5.1.jar
lucene-queryparser-5.5.1.jar
创建索引
public static void creatMysqlIndex(String configPath, Boolean type) { try { reader.close(); } catch (Exception e) { // TODO Auto-generated catch block log4j.error(e.getMessage()); } Directory directory = null; IndexWriter indexWriter = null; String sql = ""; try { File indexFile = new File(configPath+"caseIndex/mysql"); if (!indexFile.exists()) { indexFile.mkdir(); } directory = FSDirectory.open(Paths.get(configPath+"caseIndex/mysql")); IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setMaxBufferedDocs(500); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); indexWriter = new IndexWriter(directory,config);//, type TODO:Conform //indexWriter.MaxFieldLength(5000); Document doc = null; //==================== sql = "select * from tbl_hotq where question is not null and extern = 0"; ApplicationContext ac = SpringUtil.getApplicationContext(); UserDAOImpl dao = (UserDAOImpl)ac.getBean("userDao"); List<HashMap<String,String>> ret = dao.query(sql); for(Iterator<HashMap<String,String>> it=ret.iterator();it.hasNext();){ HashMap<String,String> value=it.next(); doc = new Document(); String ct = value.get("question") == "null" ? "" : value.get("question");//TODO:confirm null "null" TextField case_title = new TextField("case_title",ct, Field.Store.YES); String lid = trimHotQ(ct).toLowerCase(); StringField case_id1 = new StringField("case_id",lid,Field.Store.YES); SortedDocValuesField case_id = new SortedDocValuesField("case_id",new BytesRef(lid));//TODO:confirm fix '?' fix TextField case_keyword = new TextField("case_keyword","", Field.Store.YES); TextField case_contents = new TextField("case_contents","", Field.Store.YES); StoredField case_url = new StoredField("case_url",""); StoredField case_point1 = new StoredField("case_point", Integer.parseInt(value.get("qcount"))); SortedNumericDocValuesField case_point = new SortedNumericDocValuesField("case_point",Integer.parseInt(value.get("qcount"))); doc.add(case_title); doc.add(case_id1); doc.add(case_id); doc.add(case_keyword); doc.add(case_contents); doc.add(case_url); doc.add(case_point1); doc.add(case_point); indexWriter.addDocument(doc); //indexWriter.updateDocument(new Term("case_title",ct), doc); } //==================== sql = "select * from tbl_hotq where question is not null and extern = 1"; ac = SpringUtil.getApplicationContext(); dao = (UserDAOImpl)ac.getBean("userDao"); ret = dao.query(sql); for(Iterator<HashMap<String,String>> it=ret.iterator();it.hasNext();){ HashMap<String,String> value=it.next(); doc = new Document(); String ct = value.get("question") == "null" ? "" : value.get("question");//TODO:confirm null "null" TextField case_title = new TextField("case_title",ct, Field.Store.YES); String lid = trimHotQ(ct).toLowerCase(); StringField case_id1 = new StringField("case_id",lid,Field.Store.YES); SortedDocValuesField case_id = new SortedDocValuesField("case_id",new BytesRef(lid)); TextField case_keyword= new TextField("case_keyword","", Field.Store.YES); TextField case_contents = new TextField("case_contents","", Field.Store.YES); StoredField case_url = new StoredField("case_url",value.get("template") == null ? "" : value.get("template")); StoredField case_point1 = new StoredField("case_point", Integer.parseInt(value.get("qcount"))); SortedNumericDocValuesField case_point = new SortedNumericDocValuesField("case_point",Integer.parseInt(value.get("qcount"))); doc.add(case_title); doc.add(case_id1); doc.add(case_id); doc.add(case_keyword); doc.add(case_contents); doc.add(case_url); doc.add(case_point1); doc.add(case_point); Term term = new Term("case_id",lid); Query query = new TermQuery(term); indexWriter.deleteDocuments(query); indexWriter.updateDocument(term,doc); } //==================== indexWriter.commit(); indexWriter.close(); } catch (Exception e) { log4j.error(Debug.getFileName() + ":" + Debug.getLineNumber() + ":" + e.toString()); } }
查询
public String doSearch(String[] queryStr,int[] startCount,Operator op,String[] queryStr1,String[] queryStr2,String[] queryStr3,boolean fulldetect) throws Exception { String answer = "[#RQ#]"; HashSet<String> qSet = new HashSet<String>(); if (queryStr[0].isEmpty()) { return answer; } if (queryStr[0].startsWith("?") || queryStr[0].startsWith("*")) { return answer; } ArrayList<QaDetail> srt = new ArrayList<QaDetail>(); int n = startCount[0]; MultiFieldQueryParser qp = null; Query query = null; //兼容对数据库中tbl_case表中内容的搜索 IndexSearcher searcher = new IndexSearcher(reader); TopFieldDocs hits = null; String[] mysqlFields = { "case_title", "case_keyword", "case_contents","case_point","case_id" }; qp = new MultiFieldQueryParser(mysqlFields,analyzer); qp.setDefaultOperator(op); query = qp.parse(queryStr[0]); qSet.add(trimHotQ(queryStr[1])); // String[] keywords = processKeyword(keywordSet,queryStr); //FilteredQuery fq = new FilteredQuery(query, filter); BooleanClause q1 = new BooleanClause(query, BooleanClause.Occur.SHOULD); BooleanClause q2 = null; BooleanClause q3 = null; BooleanClause q4 = null; if(queryStr1!=null) { query = qp.parse(queryStr1[0]); qSet.add(trimHotQ(queryStr1[1])); q2 = new BooleanClause(query, BooleanClause.Occur.SHOULD); } if(queryStr2!=null) { query = qp.parse(queryStr2[0]); qSet.add(trimHotQ(queryStr2[1])); q3 = new BooleanClause(query, BooleanClause.Occur.SHOULD); } if(queryStr3!=null) { query = qp.parse(queryStr3[0]); qSet.add(trimHotQ(queryStr3[1])); q4 = new BooleanClause(query, BooleanClause.Occur.SHOULD); } BooleanQuery booleanQuery = null; if(q2!=null && q3!=null && q4!=null) { booleanQuery = new BooleanQuery.Builder().add(q1).add(q2).add(q3).add(q4).build(); }else if(q2!=null && q3!=null){ booleanQuery = new BooleanQuery.Builder().add(q1).add(q2).add(q3).build(); }else if(q2!=null) { booleanQuery = new BooleanQuery.Builder().add(q1).add(q2).build(); }else { booleanQuery = new BooleanQuery.Builder().add(q1).build(); } final SortField sfield = new SortedNumericSortField("case_point", SortField.Type.LONG,true); final SortField iField = new SortField("case_id",SortField.Type.STRING,true); //DuplicateFilter filter = new DuplicateFilter("case_id",DuplicateFilter.KM_USE_FIRST_OCCURRENCE,DuplicateFilter.PM_FULL_VALIDATION); filter hits = searcher.search(booleanQuery,20,new Sort(new SortField[] {sfield,iField}));//iField, ScoreDoc[] sortedHits =hits.scoreDocs; //================================ ApplicationContext ac = SpringUtil.getApplicationContext(); UserDAOImpl dao = (UserDAOImpl)ac.getBean("userDao"); String sql = "select * from tbl_hotq where id=?"; List<HashMap<String,String>> ret = null; QaDetail fullmatch = null; if(fulldetect) { for (String vu : qSet) { ret = dao.query(sql, vu); if(ret.size()==1) { //存在完全匹配的情况 fullmatch = new QaDetail(Integer.parseInt(ret.get(0).get("qcount")),ret.get(0).get("question"),ret.get(0).get("template"),ret.get(0).get("extern").equals("0")?0:1); break; } } } //================================ for (int j = 0; j < sortedHits.length && j < 100; j++) { int docId = sortedHits[j].doc; Document doc = searcher.doc(docId); if (libName.equalsIgnoreCase(doc.get("producttype")) || libName.equalsIgnoreCase("all")) { //tbl_case内容的处理分支 if (doc.get("case_title") != null) { String case_title = doc.get("case_title"); String case_url = doc.get("case_url"); String case_id = doc.get("case_id"); //int qcount = Integer.parseInt(doc.get("case_point")); int qcount = 0; try{ qcount = Integer.parseInt(doc.get("case_point")); }catch(Exception e){ } if(fullmatch!=null) { srt = new ArrayList<QaDetail>(); srt.add(fullmatch); n = 2; break; } if(fulldetect && qSet.contains(case_id))//完全匹配判断 { srt = new ArrayList<QaDetail>(); srt.add(new QaDetail(qcount,case_title,case_url,case_url.equals("")?0:1)); n = 2; break; } srt.add(new QaDetail(qcount,case_title,case_url,case_url.equals("")?0:1)); } n++; if (n > maxSearchResult) { //Collections.sort(srt); for (int c=0;c<srt.size();++c) //Math.min(maxSearchResult,) { QaDetail value = srt.get(c); if(0==value.Catelog) { answer += "[FAQ]" + (startCount[0]+c) + ". " + "<a onclick=\"getRelateAnswer('"+value.Question+"');return false;\">" + value.Question + "</a>[/FAQ]"; }else{ answer += "[FAQ]" + (startCount[0]+c) + ". " + "<a href=\"" + value.Url + "\" " + "target=\"_blank\">" + value.Question + "</a>[/FAQ]"; } } startCount[0] = n; return answer += "\n找不到。\n"; } } } //Collections.sort(srt); for (int c=0;c<srt.size();++c) { QaDetail value = srt.get(c); if(0==value.Catelog) { answer += "[FAQ]" + (startCount[0]+c) + ". " + "<a onclick=\"getRelateAnswer('"+value.Question+"');return false;\">" + value.Question + "</a>[/FAQ]"; }else{ answer += "[FAQ]" + (startCount[0]+c) + ". " + "<a href=\"" + value.Url + "\" " + "target=\"_blank\">" + value.Question + "</a>[/FAQ]"; } } startCount[0] = n; return answer; } public static void initCaseIndexReader(String configPath) { File directoryCase = new File(configPath+ "caseIndex"); File[] indexDirCase = directoryCase.listFiles(); int dirLenghtCase = indexDirCase.length; indexReaders = new IndexReader[dirLenghtCase]; for (int i = 0; i < dirLenghtCase; i++) { try { String tempPath = indexDirCase[i].getAbsolutePath(); Directory indexDirectory = FSDirectory.open(Paths.get(tempPath)); indexReaders[i] = DirectoryReader.open(indexDirectory); } catch (IOException e) { e.printStackTrace(); } } try { reader = new MultiReader(indexReaders); }catch(Exception e) { log4j.error(e.getMessage()); } }
更新
public void updateHotCount(String question) { String configPath = this.getClass().getResource("/").getPath().substring(1); List<HashMap<String,String>> ret = null; IndexWriter indexWriter = null; try { //DONE:relace & in patern TODO:test String sql = "update tbl_hotq set qcount=qcount+1 where id = ? and extern=0"; ApplicationContext ac = SpringUtil.getApplicationContext(); UserDAOImpl dao = (UserDAOImpl)ac.getBean("userDao"); dao.update(sql, trimHotQ(question)); // 设置sql语句中的values值 sql = "select * from tbl_hotq where id =? and extern = 0"; ret = dao.query(sql,trimHotQ(question)); Directory directory = null; //更新索引中的 File indexFile = new File(configPath+"caseIndex/mysql"); if (!indexFile.exists()) { indexFile.mkdir(); } directory = FSDirectory.open(Paths.get(configPath+"caseIndex/mysql")); // 分语,使用paoding IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setMaxBufferedDocs(500); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); indexWriter = new IndexWriter(directory,config);//, type TODO:Conform //indexWriter.MaxFieldLength(5000); Document doc = null; if(ret.size()>0) { HashMap<String,String> value = ret.get(0); doc = new Document(); String ct = value.get("question") == null ? "" : value.get("question"); TextField case_title = new TextField("case_title",ct, Field.Store.YES); String lid = trimHotQ(ct).toLowerCase(); StringField case_id1 = new StringField("case_id",lid, Field.Store.YES); SortedDocValuesField case_id = new SortedDocValuesField("case_id",new BytesRef(lid)); TextField case_keyword = new TextField("case_keyword","", Field.Store.YES); TextField case_contents = new TextField("case_contents","", Field.Store.YES); StoredField case_url = new StoredField("case_url",""); StoredField case_point1 = new StoredField("case_point", Integer.parseInt(value.get("qcount"))); SortedNumericDocValuesField case_point = new SortedNumericDocValuesField("case_point",Long.parseLong(value.get("qcount"))); doc.add(case_title); doc.add(case_id1); doc.add(case_id); doc.add(case_keyword); doc.add(case_contents); doc.add(case_url); doc.add(case_point1); doc.add(case_point); //indexWriter.addDocument(doc); Term term = new Term("case_id",lid); Query query = new TermQuery(term); indexWriter.deleteDocuments(query); indexWriter.updateDocument(term,doc); } //==================== indexWriter.commit(); indexWriter.close(); } catch (Exception e) { log4j.error(Debug.getFileName() + ":" + Debug.getLineNumber() + ":" + e.toString()); }finally { if(indexWriter!=null) { try { indexWriter.close(); } catch (Exception e) { log4j.error(e.getMessage()); } } } if(ret!=null && ret.size()>0) { try { reader.close(); }catch (Exception e) { log4j.error(e.getMessage()); } initCaseIndexReader(configPath); } }
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)