需求背景:解析一个CSV文件(大约15万条数据),获取CSV文件的表格数据,并保存到数据库中(这里选择sqlite数据库,当然也可以选择其他数据库)
思路:1、查看CSV文件的表头,方便解析
2、使用代码打开这个csv文件,循环解析文件内容
3、 批量分次保存到数据库中
具体实现如下:
- 添加对应的依赖
XML
<!--1-sqlite相关-->
<!-- https://mvnrepository.com/artifact/org.xerial/sqlite-jdbc -->
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.42.0.0</version>
</dependency>
<!--2-sqlite 方言-->
<dependency>
<groupId>com.zsoltfabok</groupId>
<artifactId>sqlite-dialect</artifactId>
<version>1.0</version>
</dependency>
<!-- 对数据库操作的统计 -->
<dependency>
<groupId>p6spy</groupId>
<artifactId>p6spy</artifactId>
<version>3.9.1</version>
</dependency>
<!-- Apache Commons CSV (可选) -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.8</version> <!-- 根据需要调整版本 -->
</dependency>
<!-- io常用工具类 -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
</dependency>
<!-- 文件上传工具类 -->
<dependency>
<groupId>commons-fileupload</groupId>
<artifactId>commons-fileupload</artifactId>
<version>1.5</version>
</dependency>
- 业务代码逻辑实现
java
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;
/**
* CSV 到 SQLite 实用程序
*
* @author xjs
*/
public class CsvToSQLiteUtils {
public static void main(String[] args) {
// CSV文件路径 CSV 文件的表头为:"雨量站", "降雨量", "经度", "纬度", "时间"
String csvFilePath = "D:\\home\\files\\rainGaugeStationContainsCoordinates.csv";
// SQLite数据库文件路径
String DATABASE_URL = "jdbc:sqlite:D:\\home\\db\\rainfall.db";
// 创建表名
Timestamp timestamp = new Timestamp(System.currentTimeMillis());
String tableName = "rainfall_" + timestamp.getTime();
System.out.println("tableName = " + tableName);
// 读取 CSV 文件并保存到 SQLite 数据库中
readCsvSaveLocal(csvFilePath, tableName, DATABASE_URL);
System.out.println("CSV文件读取完成并保存到SQLite数据库中。");
}
/**
* 读取 CSV 保存本地
*
* @param stationRainFile Station Rain 文件
* @param tableName 表名称
*/
public static void readCsvSaveLocal(String stationRainFile, String tableName, String databaseUrl) {
// 1. 创建SQLite数据库连接
try (Connection conn = DriverManager.getConnection(databaseUrl)) {
if (conn != null) {
// 2. 创建表(如果不存在)
createTableIfNotExists(conn, tableName);
// 3. 设置SQLite连接的字符编码为UTF-8
try (Statement stmt = conn.createStatement()) {
// 设置SQLite编码为UTF-8
stmt.execute("PRAGMA encoding = 'UTF-8';");
}
// 4. 开始事务
conn.setAutoCommit(false);
// 5. 读取CSV文件并插入数据
readCsvAndInsertDatas(stationRainFile, conn, tableName);
// 6. 提交事务
conn.commit();
System.out.println("数据成功插入到SQLite数据库!");
}
} catch (SQLException e) {
System.err.println("SQLite连接失败: " + e.getMessage());
}
}
/**
* 创建表,如果不存在的话
*/
private static void createTableIfNotExists(Connection conn, String tableName) throws SQLException {
try (Statement stmt = conn.createStatement()) {
String CREATE_TABLE_SQL =
"CREATE TABLE IF NOT EXISTS " + tableName + " (" +
" id INTEGER PRIMARY KEY AUTOINCREMENT," +
" station_name TEXT NOT NULL," +
" rainfall REAL NOT NULL," +
" longitude REAL NOT NULL," +
" latitude REAL NOT NULL," +
" datetime TEXT NOT NULL" +
");";
stmt.execute(CREATE_TABLE_SQL);
}
}
/**
* 读取 CSV 并插入数据
*
* @param csvFilePath CSV 文件路径
* @param conn 控制室
* @param tableName 表名称
*/
private static void readCsvAndInsertDatas(String csvFilePath, Connection conn, String tableName) {
// 使用 Apache Commons CSV 读取CSV文件
try (Reader reader = new InputStreamReader(Files.newInputStream(Paths.get(csvFilePath)), "GBK")) {
Iterable<CSVRecord> records = CSVFormat.DEFAULT
.withHeader("雨量站", "降雨量", "经度", "纬度", "时间")
.withSkipHeaderRecord() // 跳过表头
.parse(reader);
String insertDataSql = "INSERT INTO " + tableName + " (station_name, rainfall, longitude, latitude, datetime) VALUES (?, ?, ?, ?, ?);";
try (PreparedStatement pstmt = conn.prepareStatement(insertDataSql)) {
// 批量大小
int batchSize = 1000;
int count = 0;
for (CSVRecord record : records) {
// 获取每一列的数据
String stationName = record.get("雨量站");
double rainfall = Double.parseDouble(record.get("降雨量"));
double longitude = Double.parseDouble(record.get("经度"));
double latitude = Double.parseDouble(record.get("纬度"));
String datetime = record.get("时间");
// 设置插入数据的参数
pstmt.setString(1, stationName);
pstmt.setDouble(2, rainfall);
pstmt.setDouble(3, longitude);
pstmt.setDouble(4, latitude);
pstmt.setString(5, datetime);
// 添加到批处理
pstmt.addBatch();
count++;
// 每batchSize条数据执行一次批量插入
if (count % batchSize == 0) {
pstmt.executeBatch();
}
}
// 执行剩余的批量插入
pstmt.executeBatch();
} catch (SQLException e) {
System.err.println("批量插入数据时出错: " + e.getMessage());
}
} catch (IOException e) {
System.err.println("读取CSV或插入数据时出错: " + e.getMessage());
}
}
/**
* 计算总降雨量
*/
public static Double getTotal(String tableName) {
// 1. 创建SQLite数据库连接
try (Connection conn = DriverManager.getConnection("jdbc:sqlite:rainfall.db")) {
if (conn != null) {
// 2. 执行SQL查询
String queryDataSql = "select sum(rainfall_difference) as total from(SELECT ABS( (SELECT rainfall FROM " + tableName + " WHERE station_name = s.station_name ORDER BY datetime ASC LIMIT 1) - (SELECT rainfall FROM " + tableName + " WHERE station_name = s.station_name ORDER BY datetime DESC LIMIT 1)) AS rainfall_difference FROM " + tableName + " s GROUP BY station_name)";
// 3. 处理查询结果
try (PreparedStatement pstmt = conn.prepareStatement(queryDataSql)) {
ResultSet rs = pstmt.executeQuery();
while (rs.next()) {
// 获取总和
return rs.getDouble("total");
}
} catch (SQLException e) {
System.err.println("查询数据时出错: " + e.getMessage());
}
}
} catch (SQLException e) {
System.err.println("SQLite连接失败: " + e.getMessage());
}
return null;
}
至此,就可以把CSV文件的内容保存到sqlite数据库中了!