简介
- 数据每分钟产生200条,使用mysql储存。
- 目前有数据超过700M。
- 按照日期查询,按月查询包含每次超过20w条以上,时间比较长。
- 计划使用lucene优化查询,不适用es是因为项目较小,没有更富裕的资源。
基本步骤
- 引入依赖。
- 开发工具类。
- 开发索引功能,完成索引。
- 开发定时任务,完成数据增量更新。
- 开发搜索功能,可以搜索数据。
引入依赖
- 修改pom文件
xml
复制代码
<!-- Lucence核心包 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>9.7.0</version>
</dependency>
<!-- Lucene查询解析包 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>9.7.0</version>
</dependency>
- 注:没有使用更多的包是因为这次优化是以long类型区间计算为主,不需要全文索引,所以有基础的包就够了。
工具类
- 实现基本的生成、删除和查询。
java
复制代码
import com.xxx.common.ResponseCode;
import com.xxx.common.exception.SystemException;
import com.xxx.common.util.ValidUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
@Component
@Slf4j
public class LuceneUtil {
//索引文件存放路径
@Value("${lucene.index.path}")
private String luceneIndexPath;
/**
生成索引方法
*/
public <T> void createIndex(List<T> list, CreateDocumentHandler handler) {
File file = new File(luceneIndexPath);
if (!file.exists()) {
file.mkdir();
}
if (ValidUtil.isEmpty(list)) {
return;
}
long startTime = System.currentTimeMillis();
IndexWriter writer = null;
try {
Directory dir = FSDirectory.open(Paths.get(luceneIndexPath));
//标准分词器,会自动去掉空格啊,is a the等单词
Analyzer analyzer = new StandardAnalyzer();
//将标准分词器配到写索引的配置中
IndexWriterConfig config = new IndexWriterConfig(analyzer);
//实例化写索引对象
writer = new IndexWriter(dir, config);
for (T t : list) {
Document doc = handler.createDocument(t);
writer.addDocument(doc);
}
writer.commit();
} catch (Exception e) {
throw new SystemException(ResponseCode.ERROR, e);
} finally {
try {
if (null != writer) {
writer.close();
}
} catch (Exception e) {
throw new SystemException(ResponseCode.ERROR, e);
}
}
//记录索引结束时间
long endTime = System.currentTimeMillis();
log.info("建立索引耗时" + (endTime - startTime) + "毫秒");
}
/**
清楚所有索引
*/
public void clean() {
File file = new File(luceneIndexPath);
if (!file.exists()) {
return;
}
long startTime = System.currentTimeMillis();
IndexWriter writer = null;
try {
Directory dir = FSDirectory.open(Paths.get(luceneIndexPath));
//标准分词器,会自动去掉空格啊,is a the等单词
Analyzer analyzer = new StandardAnalyzer();
//将标准分词器配到写索引的配置中
IndexWriterConfig config = new IndexWriterConfig(analyzer);
//实例化写索引对象
writer = new IndexWriter(dir, config);
writer.deleteAll();
} catch (Exception e) {
throw new SystemException(ResponseCode.ERROR, e);
} finally {
try {
if (null != writer) {
writer.close();
}
} catch (Exception e) {
throw new SystemException(ResponseCode.ERROR, e);
}
}
//记录索引结束时间
long endTime = System.currentTimeMillis();
log.info("清除索引耗时" + (endTime - startTime) + "毫秒");
}
/**
查询
*/
public List<Document> search(CreateQueryParamsHandler handler) {
File file = new File(luceneIndexPath + File.separator + "write.lock");
if (!file.exists()) {
return new ArrayList<>();
}
IndexReader reader = null;
try {
//获取要查询的路径,也就是索引所在的位置
Directory dir = FSDirectory.open(Paths.get(luceneIndexPath));
reader = DirectoryReader.open(dir);
if (reader == null) {
return new ArrayList<>();
}
//构建IndexSearcher
IndexSearcher searcher = new IndexSearcher(reader);
//记录索引开始时间
long startTime = System.currentTimeMillis();
//开始查询,查询前10条数据,将记录保存在docs中
TopDocs docs = handler.handler(searcher);
//记录索引结束时间
long endTime = System.currentTimeMillis();
log.info("索引查询耗时" + (endTime - startTime) + "毫秒");
List<Document> result = new ArrayList<>(Long.valueOf(docs.totalHits.value).intValue());
//取出每条查询结果
for(ScoreDoc scoreDoc : docs.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
result.add(doc);
}
return result;
} catch (Exception e) {
throw new SystemException(ResponseCode.ERROR, e);
} finally {
try {
assert reader != null;
reader.close();
} catch (IOException e) {
throw new SystemException(ResponseCode.ERROR, e);
}
}
}
}
生成索引功能
java
复制代码
public void index(Date startDate) {
log.info("start index! Date : " + DateUtil.format(DateUtil.now()));
Date curStartDate = startDate;
while (true) {
Date curEndDate = DateUtil.datePlusDays(curStartDate, 1);
List<CurrencyData> list = currencyDataMapper.queryLuceneList(CurrencyDataForm.builder().createTimeBegin(curStartDate.getTime()).createTimeEnd(curEndDate.getTime()).build());
log.info(String.format("index startDate = %s, endDate = %s, size = %s", DateUtil.format(curStartDate), DateUtil.format(curEndDate), list.size()));
if (list.size() == 0) {
CurrencyDataForm countForm = CurrencyDataForm.builder().createTimeBegin(curStartDate.getTime()).build();
List<CurrencyData> one = currencyDataMapper.getOne(countForm);
log.info("has more begin:" + DateUtil.format(curEndDate) + ", result: " + (one.size() > 0 ? "yes" : "no"));
if (one.size() == 0) {
break;
}
}
luceneUtil.createIndex(list, (CreateDocumentHandler<Data>) data -> {
Document doc = new Document();
//开始添加字段
doc.add(new TextField("dId", data.getDId(), Field.Store.YES));
doc.add(new TextField("typeId", data.getTypeId(), Field.Store.YES));
//区间查询需要
doc.add(new LongPoint("createTime", data.getCreateTime()));
//储存需要
doc.add(new StoredField("createTime", data.getCreateTime()));
// 排序需要
doc.add(new NumericDocValuesField("sortTime", data.getCreateTime()));
// 第二个参数需要处理非空的情况
doc.add(new TextField("value", (ValidUtil.isEmpty(data.getValue()) ? "" : data.getValue()) , Field.Store.YES));
doc.add(new TextField("unit", (ValidUtil.isEmpty(data.getUnit()) ? "" : data.getUnit()) , Field.Store.YES));
return doc;
});
curStartDate = curEndDate;
}
log.info("finish index!");
}
- 注:每次生成1天的索引,如果本轮没数据,并且大于结束时间也没数据,结束索引。
定时任务
java
复制代码
private ThreadPoolTaskExecutor tpe;
tpe.execute(() -> {
Date startDate = null;
try {
startDate = getLastDate();
} catch (SystemException s) {
luceneUtil.clean();
startDate = DateUtil.parse(initStartTime);
}
try {
index(startDate);
} catch (Exception e) {
log.info("生成索引异常。", e);
} finally {
ScheduledExecutorService executor = Executors.newScheduledThreadPool(1);
executor.schedule(this::init, 60, TimeUnit.SECONDS);
executor.shutdown();
}
});
- 注:使用线程池+延时任务,实现每60s执行一次功能。
搜索
java
复制代码
public List<Data> queryIndex(Form form) {
List<Data> result = new ArrayList<>();
List<Document> documentList = luceneUtil.search((searcher) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
if (ValidUtil.isNotEmpty(form.getDId())) {
TermQuery deviceIdQuery = new TermQuery(new Term("dId", form.getDId()));
builder.add(deviceIdQuery, BooleanClause.Occur.MUST);
}
if (ValidUtil.isNotEmpty(form.getTypeId())) {
TermQuery typeQuery = new TermQuery(new Term("typeId", form.getTypeId()));
builder.add(deviceIdQuery, BooleanClause.Occur.MUST);
}
if (ValidUtil.isNotEmpty(form.getBegin()) && ValidUtil.isNotEmpty(form.getEnd())) {
Query timeQuery = LongPoint.newRangeQuery("time", form.getBegin().getTime(), form.getEnd().getTime());
builder.add(timeQuery, BooleanClause.Occur.MUST);
}
Sort sort = new Sort(new SortField("sortTime", SortField.Type.LONG, false));
// 执行查询
return searcher.search(builder.build(), form.getSize(), sort);
});
for (Document document : documentList) {
Data data = new Data();
data.setTypeId(Integer.valueOf(document.get("typeId")));
data.setDId(Integer.valueOf(document.get("dId")));
data.setTime(document.getField("time").numericValue().longValue());
data.setValue(document.get("value"));
data.setUnit(document.get("unit"));
result.add(data);
}
return result;
}