
在日常开发中,如果要操作数据库的话,或多或少都会遇到批量数据的处理,我们公司使用的mybaits-plus作为持久层的框架,今天就简单介绍一下mybaits批量操作数据库的几种方式。
1.循环插入
其实这种方式并不是批量插入,只是在日常开发中,使用这种方式的还是比较多的。
            
            
              java
              
              
            
          
          @RunWith(SpringRunner.class)
@SpringBootTest
public class BatchTest {
    @Resource
    private StudentMapper studentMapper;
    @Test
    public void test(){
        List<Student> students = generateStudentData(100);
        long start = System.currentTimeMillis();
        students.forEach(studentMapper::insert);
        System.out.println("循环插入: " + students.size() + " 条数据,共计耗时:" + (System.currentTimeMillis() - start) + " 毫秒");
    }
    public static List<Student> generateStudentData(int size){
        List<Student> list = new ArrayList<>(size);
        Random random = new Random();
        for (int i = 0; i < size; i++) {
            Student student = new Student();
            student.setName("zhangsan-" + i);
            student.setAge(random.nextInt(40));
            student.setPhone("15077828899");
            student.setBirthday(LocalDateTime.now());
            list.add(student);
        }
        return list;
    }
}2.foreach方式插入
            
            
              java
              
              
            
          
          @RunWith(SpringRunner.class)
@SpringBootTest
public class BatchTest {
    @Resource
    private StudentMapper studentMapper;
    @Test
    public void test3(){
        List<Student> students = generateStudentData(100);
        long foreachStart = System.currentTimeMillis();
        studentMapper.insertBatch(students);
        System.out.println("foreach插入: " + students.size() + " 条数据,共计耗时:" + (System.currentTimeMillis() - foreachStart) + " 毫秒");
    }
    public static List<Student> generateStudentData(int size){
        List<Student> list = new ArrayList<>(size);
        Random random = new Random();
        for (int i = 0; i < size; i++) {
            Student student = new Student();
            student.setName("zhangsan-" + i);
            student.setAge(random.nextInt(40));
            student.setPhone("15077828899");
            student.setBirthday(LocalDateTime.now());
            list.add(student);
        }
        return list;
    }
}StudentMapper 接口如下:
            
            
              java
              
              
            
          
          public interface StudentMapper extends BaseMapper<Student> {
    /**
     * 批量插入
     */
    int insertBatch(@Param("entities") List<Student> entities);
    /**
     * 批量更新或者插入
     */
    int insertOrUpdateBatch(@Param("entities") List<Student> entities);
}StudentMapper.xml内容如下:
            
            
              xml
              
              
            
          
          <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.efreight.oss.transfer.dao.StudentMapper">
    <resultMap type="com.efreight.oss.transfer.entity.Student" id="StudentMap">
        <result property="id" column="id" jdbcType="INTEGER"/>
        <result property="name" column="name" jdbcType="VARCHAR"/>
        <result property="age" column="age" jdbcType="INTEGER"/>
        <result property="birthday" column="birthday" jdbcType="TIMESTAMP"/>
        <result property="phone" column="phone" jdbcType="VARCHAR"/>
    </resultMap>
    <!-- 批量插入 -->
    <insert id="insertBatch" keyProperty="id" useGeneratedKeys="true">
        insert into cargo.student(name, age, birthday, phone)
        values
        <foreach collection="entities" item="entity" separator=",">
        (#{entity.name}, #{entity.age}, #{entity.birthday}, #{entity.phone})
        </foreach>
    </insert>
    <!-- 批量插入或按主键更新 -->
    <insert id="insertOrUpdateBatch" keyProperty="id" useGeneratedKeys="true">
        insert into cargo.student(name, age, birthday, phone)
        values
        <foreach collection="entities" item="entity" separator=",">
            (#{entity.name}, #{entity.age}, #{entity.birthday}, #{entity.phone})
        </foreach>
        on duplicate key update
        name = values(name) , age = values(age) , birthday = values(birthday) , phone = values(phone)
    </insert>
</mapper>搭配
useGeneratedKeys="true"和keyProperty="id"属性(这个id是javaBean的属性),可以获取自增主键,有时候这个主键我们是非常需要的。
3.批处理方式插入
通过
ExecutorType.BATCH来构建一个可以完成批处理工作的执行器
            
            
              java
              
              
            
          
          @RunWith(SpringRunner.class)
@SpringBootTest
public class BatchTest {
    @Resource
    private StudentMapper studentMapper;
    @Resource
    private SqlSessionTemplate sqlSessionTemplate;
    @Test
    public void test2(){
        List<Student> students = generateStudentData(100);
        long batchStart = System.currentTimeMillis();
        try(SqlSession sqlSession = this.sqlSessionTemplate.getSqlSessionFactory().openSession(ExecutorType.BATCH, false)) {
            StudentMapper studentMapper = sqlSession.getMapper(StudentMapper.class);
            for (int i = 0; i < students.size(); i++) {  
                studentMapper.insert(students.get(i));  
                if (i % 1000 == 0 || i == students.size() - 1) {  
                sqlSession.flushStatements();  
                }  
            }  
            sqlSession.commit();
        }
        System.out.println("mybatis批处理插入: " + students.size() + " 条数据,共计耗时:" + (System.currentTimeMillis() - batchStart) + " 毫秒");
    }
    public static List<Student> generateStudentData(int size){
        List<Student> list = new ArrayList<>(size);
        Random random = new Random();
        for (int i = 0; i < size; i++) {
            Student student = new Student();
            student.setName("zhangsan-" + i);
            student.setAge(random.nextInt(40));
            student.setPhone("15077828899");
            student.setBirthday(LocalDateTime.now());
            list.add(student);
        }
        return list;
    }
}注意:批处理方式是无法获取自增主键的。
顺便说一下,现在使用mybatis-plus的也非常多,他也提供了批量插入的功能,它内部使用的就是ExecutorType.BATCH 来构建的。

关于三者性能的比较大家可以跑下看看,循环插入的方式性能最差,能不用就尽量不用,在foreach 和批处理 中,我测试发现foreach的性能最好(我用的MySQL是5.7),所以项目中我们批处理使用的都是foreach, 因为一般我们一般批量插入最多也就2000条左右的数据,但是大家可以根据自己机器的实际情况,去跑一下看看。
| 插入方式 | 100条 | 1000条 | 10000条 | 10万条 | 
|---|---|---|---|---|
| 循环插入 | 1599 毫秒 | 14336 毫秒 | 140793 毫秒 | * | 
| foreach | 62 毫秒 | 364 毫秒 | 3249 毫秒 | 23940 毫秒 | 
| 批处理 | 321 毫秒 | 6868 毫秒 | 72851 毫秒 | 457005 毫秒 |