在前文基础上,好奇作为事务型数据库的SQLite表现怎么样,让DeepSeek来帮忙。
提示词
请仿照附件编写用python插入sqlite数据的测试函数,如果sqlite3没有对应方法就省略
python
import sqlite3
import pandas as pd
import time
methods = ['字符串拼接', '绑定变量循环', 'executemany批量', 'VALUES批量', 'DataFrame插入']
trans = ['自动提交', '事务']
def test_sqlite_insert(method, n, tran):
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()
cursor.execute("CREATE TABLE t(a INT)")
data = [(i,) for i in range(n)]
df = pd.DataFrame({'a': range(n)})
if tran == 1:
conn.execute("BEGIN")
start_time = time.time()
try:
if method == 0: # 字符串拼接
for i in range(n):
cursor.execute(f"INSERT INTO t VALUES ({i})")
elif method == 1: # 绑定变量循环
for i in range(n):
cursor.execute("INSERT INTO t VALUES (?)", (i,))
elif method == 2: # executemany批量
cursor.executemany("INSERT INTO t VALUES (?)", data)
elif method == 3: # VALUES批量
batch_size = 1000
for i in range(0, n, batch_size):
batch = range(i, min(i+batch_size, n))
values = ",".join(f"({x})" for x in batch)
cursor.execute(f"INSERT INTO t VALUES {values}")
elif method == 4: # DataFrame插入
df.to_sql('t', conn, if_exists='append', index=False)
if tran == 1:
conn.commit()
except Exception as e:
if tran == 1:
conn.rollback()
raise e
elapsed = round(time.time() - start_time, 2)
print(f"{trans[tran]} {methods[method]}:{elapsed}s")
conn.close()
return elapsed
def run_sqlite_tests(n=10000):
print(f"\nSQLite 插入性能测试 (n={n})")
print("="*40)
for m in range(len(methods)):
for t in range(2):
test_sqlite_insert(m, n, t)
# 测试单条SQL批量插入(分批次)
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()
cursor.execute("CREATE TABLE t(a INT)")
start_time = time.time()
batch_size = 1000
for i in range(0, n, batch_size):
batch = range(i, min(i+batch_size, n))
values = ",".join(f"({x})" for x in batch)
cursor.execute(f"INSERT INTO t VALUES {values}")
conn.commit()
elapsed = round(time.time() - start_time, 2)
print(f"单条SQL批量插入(分批次):{elapsed}s")
conn.close()
if __name__ == "__main__":
run_sqlite_tests()
提示词
请为duckdb也增加INSERT INTO t VALUES(1),(2),(3)...这种批量方法的测试
python
import duckdb
import pandas as pd
import time
methods = ['字符串拼接', '绑定变量循环', '绑定变量批量', 'VALUES批量', 'DataFrame插入']
trans = ['自动提交', '事务']
def test_duckdb_insert(method, n, con, tran):
con.execute("CREATE OR REPLACE TABLE t(a INT)")
if tran == 1:
con.execute("BEGIN TRANSACTION")
start_time = time.time()
if method == 0: # 字符串拼接
for i in range(n):
con.execute("INSERT INTO t SELECT "+str(i))
elif method == 1: # 绑定变量循环
for i in range(n):
con.execute("INSERT INTO t VALUES (?)", [i])
elif method == 2: # 绑定变量批量
data = [[i] for i in range(n)]
con.executemany("INSERT INTO t VALUES (?)", data)
elif method == 3: # VALUES批量
batch_size = 1000
for i in range(0, n, batch_size):
batch = range(i, min(i+batch_size, n))
values = ",".join(f"({x})" for x in batch)
con.execute(f"INSERT INTO t VALUES {values}")
elif method == 4: # DataFrame插入
df = pd.DataFrame({'a': range(n)})
con.register('temp_df', df)
con.execute("INSERT INTO t SELECT a FROM temp_df")
con.unregister('temp_df')
if tran == 1:
con.execute("COMMIT")
elapsed = round(time.time() - start_time, 2)
print(f"{trans[tran]} {methods[method]}:{elapsed}s")
return elapsed
def run_duckdb_tests(n=10000):
con = duckdb.connect(database=":memory:")
print(f"\nDuckDB 插入性能测试 (n={n})")
print("="*40)
for m in range(len(methods)):
for t in range(2):
test_duckdb_insert(m, n, con, t)
# 测试使用range函数直接生成数据
start_time = time.time()
con.execute("CREATE OR REPLACE TABLE t(a INT)")
con.execute(f"INSERT INTO t SELECT i FROM range({n}) t(i)")
elapsed = round(time.time() - start_time, 2)
print(f"使用range函数批量插入:{elapsed}s")
con.close()
if __name__ == "__main__":
run_duckdb_tests()
测试结果如下
bash
python pysqlite3.txt
SQLite 插入性能测试 (n=10000)
========================================
自动提交 字符串拼接:0.08s
事务 字符串拼接:0.08s
自动提交 绑定变量循环:0.02s
事务 绑定变量循环:0.03s
自动提交 executemany批量:0.02s
事务 executemany批量:0.02s
自动提交 VALUES批量:0.02s
事务 VALUES批量:0.02s
自动提交 DataFrame插入:0.02s
事务 DataFrame插入:0.02s
单条SQL批量插入(分批次):0.02s
python pyduckdb3.txt
DuckDB 插入性能测试 (n=10000)
========================================
自动提交 字符串拼接:5.53s
事务 字符串拼接:4.2s
自动提交 绑定变量循环:6.22s
事务 绑定变量循环:5.02s
自动提交 绑定变量批量:2.99s
事务 绑定变量批量:1.79s
自动提交 VALUES批量:0.08s
事务 VALUES批量:0.08s
自动提交 DataFrame插入:0.01s
事务 DataFrame插入:0.0s
使用range函数批量插入:0.0s
可见,sqlite的单行插入性能比duckdb高出很多,基本差2个数量级。sqlite绑定变量比拼接也有明显提高,它的executemany批量也和在VALUES中列举多个值的批量效果相当。Duckdb的VALUES批量插入效率也比较高。我原先不知道还有这种方法,DeepSeek的知识还真全面。pandas的DataFrame插入在两种数据库中都很高效。