研究lucene几天的一点心得
|
cy729215495
cy729215495
普通会员
|
1 # 大 中 小 发表于 2008-05-03 15:27:31
首先感谢老紫竹同志给我一个机会让我正式的第一次发帖(呵呵,我在csdn上经常潜水,不是我不想回答,而是别人回答的已经很好了),快毕业了,老师分任务的时候让我做网站的搜索引擎,当时只听了一个单词叫lucene,只知道要用这个技术,得,回来就开始研究,找资料,问了好多人,都说没整过这玩意,五一节就是在这种郁闷中过的,后来跟老紫竹同志交谈,敢尽人都不用lucene,用数据库自带的,不知道自己是不是白研究了,不过感觉研究这个东西还是很光荣的,好多人都不知道(小人得志),其实我的想法是面试的时候就跟别人说我会lucene,希望能增加别人的好感,说实话,不知道自己达到什么水平才能在外面很顺利的找到事情,也不知道该去那个地方闯,我现在是在武汉,反正就是不想留在这,说了很多废话,希望路过的大哥大姐门给我点意见,好了,我就开始说lucene 吧;
数据库users表 id integer primary key name varchar(50) --------------------- java代码 连接类 package test.lucene;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class DBAccess {
/**
* @param args
*/
private static Connection conn = null;
private static DBAccess db = null;
private DBAccess() throws ClassNotFoundException, SQLException {
this("root", "admin");
}
private DBAccess(String userName, String pwd) throws ClassNotFoundException, SQLException {
Class.forName("com.mysql.jdbc.Driver");
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", userName, pwd);
}
public static Connection getConnection() {
try {
if (db == null)
db = new DBAccess();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return conn;
}
public static void close(ResultSet rs, PreparedStatement ps, Statement st, Connection conn) {
try {
if (rs != null)
rs.close();
if (ps != null)
ps.close();
if (st != null) {
st.close();
}
if (conn != null) {
conn.close();
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
System.out.println(DBAccess.getConnection());
}
}测试类package test.lucene;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
import java.util.Timer;
import java.util.TimerTask;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.FSDirectory;
public final class LuceneTest {
final String path = "d:/index/";// 索引文件的存放物理位置
public synchronized ResultSet getResult(String sql) {// 根据sql语句从数据库中取得结果集
Connection conn = DBAccess.getConnection();
Statement stmt = null;
ResultSet rs = null;
try {
stmt = conn.createStatement();
rs = stmt.executeQuery(sql);
} catch (SQLException e) {
e.printStackTrace();
}
return rs;
}
public synchronized void Index(ResultSet rs) {// 通过结果集就可以获得数据源了
try {
FSDirectory fsDir = FSDirectory.getDirectory(path, true);
IndexWriter writer = new IndexWriter(fsDir, getAnalyzer(), true);
Date start = new Date();
while (rs.next()) {
Document doc = new Document();// 一个文档相当与表的一条记录
doc.add(Field.Keyword("id", rs.getString("id")));// 字段id放的是数据库表中的id,lucene的一条记录的一个字段下的数据可以放多个值,这点与数据库表不同
doc.add(Field.Text("name", rs.getString("userName")));
writer.addDocument(doc);
}
fsDir.close();// 一定要关闭文件,因为前面的IndexWriter设置为true
writer.optimize();// 优化
writer.close();// 一定要关闭,否则不能把内存中的数据写到文件
Date end = new Date();
System.out.println("索引建立成功!!!!" + "用时" + (end.getTime() - start.getTime()) + "毫秒");
} catch (IOException e) {
System.out.println(e);
} catch (SQLException e) {
System.out.println(e);
}
}
public synchronized Analyzer getAnalyzer() {
return new StandardAnalyzer();
}
public Hits seacher(String queryString) {// 根据关键字搜索
Hits hits = null;
try {
Query query = new WildcardQuery(new Term("name", "*" + queryString + "*"));// 模糊查询一下
Searcher searcher = new IndexSearcher("d:/index/");
hits = searcher.search(query);
searcher.close();
} catch (Exception e) {
System.out.print(e);
}
return hits;
}
public static void main(String[] args) {
final Timer timer = new Timer();
timer.schedule(new TimerTask() {// 设置一个定时器,每分钟创建一个索引,为什么呢,因为数据库中数据是在变的呀
public synchronized void run() {
LuceneTest lt = new LuceneTest();
ResultSet rs = lt.getResult("select * from users");
lt.Index(rs);
}
}, 0, 60 * 1000);
}
}页面部分<%@ page language="java" import="java.util.*" pageEncoding="gbk"%>
<%@ page import="test.lucene.LuceneTest"%>
<%@ page import="org.apache.lucene.search.*,org.apache.lucene.document.*" %>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<base href="<%=basePath%>">
<title>helloworld lucene </title>
</head>
<body>
<%
LuceneTest lucent=new LuceneTest();
Hits hits=null;
if(request.getParameter("queryString")!=null){
hits =lucent.seacher(new String(request.getParameter("queryString").getBytes("iso-8859-1"),"gb2312"));
} %> <br>
<form action="TestLucene.jsp">
<input type="text" name="queryString"/>
<input type="submit" value="搜索"/>
</form>
<table>
<%if(hits!=null){%>
<tr>
<td>作者号</td>
<td>作者名</td>
</tr>
<% for(int i=0;i<hits.length();i++){
Document doc=hits.doc(i);
%>
<tr>
<td><%=doc.get("id") %></td>
<td><%=doc.get("name") %></td>
</tr>
<% }}%>
</table>
</body>
</html>不过我有个问题,就是document的数据源是直接用结果集里面的数据,还是把结果集的数据解析为xml后在从xml中读取;
欢迎大家和我讨论,我qq:729215495 |
|||||
|
|
hpyouyou
hpyouyou
普通会员
|
2 # 大 中 小 发表于 2008-05-05 17:58:54
感觉lz应该先较详细的讲述下lucene是什么东东 然后再举例子 那样就容易理解了
说实话,光看到一堆代码, 就看到里面调用了那个lucene的类 最后还没明白这个lucene是什么 到底有什么好处~ |
|||||
|
|
chen4765654
chen4765654
普通会员
|
3 # 大 中 小 发表于 2008-07-05 09:26:29
其实简单全文检索,键个数据库表就可以实现了
id key relationid 字段key保存关键字用于检索,很多数据库也自带全文检索功能,但是数据库自带分词不是很好,很多时候还是得自己搞定。 下面是我自己写得一个通用lucene类,indexwriter和indexreader都使用单例,提高性能 public class LucenTemplate { /** * 2008-6-1 Administrator * * @param args * void * */ private static Log log = LogFactory.getLog(LucenTemplate.class); private final Object lock = new Object(); private IndexWriter indexwriter; private IndexReader indexreader; private String dirPath; private Analyzer analyzer; private WhitespaceAnalyzer seachAnalyzer;// 查询语句是按空白符分解,这点要注意 private QueryParser queryparser; // 评分降序,评分一样时后索引的排前面 private Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField(null, SortField.DOC, true) }); LucenTemplate(String indexPath) { dirPath = indexPath; analyzer = new MIK_CAnalyzer(); seachAnalyzer = new WhitespaceAnalyzer(); queryparser = new QueryParser("id", seachAnalyzer); try { initIndexWriter(); initIndexReader(); } catch (IOException e) { log.error("索引初始化失败", e); } } public synchronized void initIndexWriter() throws IOException { if (indexwriter == null) { File dir = new File(dirPath); if (!dir.isDirectory() || !dir.exists()) throw new FileNotFoundException("索引目录不存在或者无效"); if (IndexReader.isLocked(dirPath)) throw new IOException("索引目录已被锁定,无法初始化"); indexwriter = new IndexWriter(dir, analyzer); indexwriter.setUseCompoundFile(true);// 会合并多个 Segments 文件到一个 .cfs // 中。此方式有助于减少索引文件数量,减少同时打开的文件数量 } } public synchronized void initIndexReader() throws IOException { if (indexreader == null) { File dir = new File(dirPath); if (!dir.isDirectory() || !dir.exists()) throw new FileNotFoundException("索引目录不存在或者无效"); indexreader = IndexReader.open(dir); } } public void refreashIndexReader() throws IOException { IndexReader temp = indexreader.reopen(); if (temp != indexreader) indexreader.close(); indexreader = temp; } public IndexWriter getIndexWriter() throws IOException { if (indexwriter == null) initIndexWriter(); return indexwriter; } public IndexReader getIndexReader() throws IOException { if (indexreader == null) initIndexReader(); return indexreader; } // 获取IndexSearcher,切记要在外层代码使用完毕后close它 public IndexSearcher getIndexSearcher() throws IOException { IndexReader reader = getIndexReader(); IndexSearcher searcher = new IndexSearcher(reader); return searcher; } // 写操作需要同步 public void optimize() { try { indexwriter.optimize(); } catch (IOException e) { log.error("索引优化失败", e); } } // /////////////////////////////////////////// // 业务常用方法 public void addDocument(Document doc) throws LuceenAccessException { try { indexwriter.addDocument(doc); } catch (IOException e) { throw new LuceenAccessException(e); } } // 多个条件或查询 public List searchDocumentByOR(List fields) throws LuceenAccessException { String queryStr = ""; for (Iterator iter = fields.iterator(); iter.hasNext();) { SearchField field = (SearchField) iter.next(); queryStr = queryStr + field.getKey() + ":" + field.getValue() + ' '; } return searchDocument(queryStr); } // 需要外层传入解析语句进行查询,用于复杂查询 // 应该包装一个分页方法 public List searchDocument(String queryStr) throws LuceenAccessException { List result = new ArrayList(); Query query = null; IndexSearcher searcher = null; try { synchronized (lock) { // queryparser多线程并发下需要同步 query = queryparser.parse(queryStr); } searcher = getIndexSearcher(); Hits hits = searcher.search(query, sort); for (int i = 0; i < hits.length(); i++) { Document document = hits.doc(i); document.setBoost(hits.score(i)); result.add(document); } return result; } catch (ParseException e) { throw new LuceenAccessException(e); } catch (IOException e) { throw new LuceenAccessException(e); } finally { try { searcher.close(); } catch (IOException e) { throw new LuceenAccessException("IndexSearch关闭失败", e); } } } // 分页检索,注意n从0开始 public List searchDocumentNToMByQueryStr(String queryStr, int n, int m) throws LuceenAccessException { if (n > m || n < 0 || m < 0) throw new IllegalArgumentException("N或者M参数无效"); List result = new ArrayList(); Query query = null; IndexSearcher searcher = null; try { synchronized (lock) { // queryparser多线程并发下需要同步 query = queryparser.parse(queryStr); } searcher = getIndexSearcher(); Hits hits = searcher.search(query, sort); for (int i = n; i <= m; i++) { Document document = hits.doc(i); document.setBoost(hits.score(i)); result.add(document); } return result; } catch (ParseException e) { throw new LuceenAccessException(e); } catch (IOException e) { throw new LuceenAccessException(e); } finally { try { searcher.close(); } catch (IOException e) { throw new LuceenAccessException("IndexSearch关闭失败", e); } } } // N到M查询,注意n从0开始 public List searchDocumentNToM(List fields, int n, int m) throws LuceenAccessException { String queryStr = ""; for (Iterator iter = fields.iterator(); iter.hasNext();) { SearchField field = (SearchField) iter.next(); queryStr = queryStr + field.getKey() + ":" + field.getValue() + ' '; } return searchDocumentNToMByQueryStr(queryStr, n, m); } // 分页查询 public List searchDocumentPaged(List fields, int pageSize, int page) throws LuceenAccessException { if (pageSize < 1 || page < 1) throw new IllegalArgumentException("pageSize或者page参数无效"); int n = pageSize * (page - 1); int m = n + pageSize - 1; return searchDocumentNToM(fields, n, m); } public void close() { try { if (indexwriter != null) indexwriter.close(); if (indexreader != null) indexreader.close(); } catch (IOException e) { log.error("关闭writer和reader失败", e); } } public String getDirPath() { return dirPath; } public void setDirPath(String dirPath) { this.dirPath = dirPath; } public static class SearchField { private String key; private String value; SearchField(String key, String value) { this.key = key; this.value = value; } public String getKey() { return key; } public void setKey(String key) { this.key = key; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } } |
|||||
|


