原始Atkin筛法来自pocketpy的benchmarks/primes.py
1.纯python版本
python
import time
from math import isqrt
def sieve_eratosthenes(limit):
"""
埃拉托斯特尼筛法求素数
返回素数列表
"""
if limit < 2:
return []
# 初始化布尔数组,True表示可能是素数
is_prime = [True] * (limit + 1)
is_prime[0] = is_prime[1] = False
# 只需筛到 sqrt(limit)
for i in range(2, isqrt(limit) + 1):
if is_prime[i]:
# 从 i*i 开始标记,因为更小的倍数已被更小的素数标记过
step = i
start = i * i
is_prime[start:limit+1:step] = [False] * ((limit - start) // step + 1)
# 收集素数
primes = [i for i, prime in enumerate(is_prime) if prime]
return primes
def sieve_atkin(limit):
"""
Atkin筛法求素数
返回素数列表
"""
if limit < 2:
return []
if limit == 2:
return [2]
if limit == 3:
return [2, 3]
# 初始化布尔数组,全部设为False
is_prime = [False] * (limit + 1)
# 处理小素数
is_prime[2] = True
is_prime[3] = True
# 三种二次型筛选
# 注意:代码中使用 x 和 y 的平方小于 limit 的条件,与原始Atkin筛法一致
limit_sqrt = isqrt(limit)
for x in range(1, limit_sqrt + 1):
xx = x * x
for y in range(1, limit_sqrt + 1):
yy = y * y
# 第一种形式: 4x^2 + y^2
n = 4 * xx + yy
if n <= limit and (n % 12 == 1 or n % 12 == 5):
is_prime[n] = not is_prime[n]
# 第二种形式: 3x^2 + y^2
n = 3 * xx + yy
if n <= limit and n % 12 == 7:
is_prime[n] = not is_prime[n]
# 第三种形式: 3x^2 - y^2
n = 3 * xx - yy
if x > y and n <= limit and n % 12 == 11:
is_prime[n] = not is_prime[n]
# 移除平方数的倍数
for r in range(5, limit_sqrt + 1):
if is_prime[r]:
rr = r * r
for multiple in range(rr, limit + 1, rr):
is_prime[multiple] = False
# 收集素数
primes = [i for i, prime in enumerate(is_prime) if prime]
return primes
def sum_of_primes(primes_list):
"""计算素数列表的和"""
return sum(primes_list)
def compare_algorithms(limits):
"""
比较埃拉托斯特尼筛法和Atkin筛法的性能
limits: 要测试的上限列表,例如 [1000000, 5000000, 10000000]
"""
print("=" * 80)
print(f"{'上限':<12} {'算法':<20} {'素数个数':<12} {'素数总和':<20} {'耗时(秒)':<12}")
print("=" * 80)
results = {}
for limit in limits:
# 测试埃拉托斯特尼筛法
start_time = time.perf_counter()
primes_era = sieve_eratosthenes(limit)
time_era = time.perf_counter() - start_time
sum_era = sum_of_primes(primes_era)
count_era = len(primes_era)
# 测试Atkin筛法
start_time = time.perf_counter()
primes_atkin = sieve_atkin(limit)
time_atkin = time.perf_counter() - start_time
sum_atkin = sum_of_primes(primes_atkin)
count_atkin = len(primes_atkin)
# 验证两种算法结果一致
assert count_era == count_atkin, f"素数个数不一致: {count_era} vs {count_atkin}"
assert sum_era == sum_atkin, f"素数和不一致: {sum_era} vs {sum_atkin}"
# 存储结果
results[limit] = {
'era': {'time': time_era, 'sum': sum_era, 'count': count_era},
'atkin': {'time': time_atkin, 'sum': sum_atkin, 'count': count_atkin}
}
# 打印结果
print(f"{limit:<12,} {'埃拉托斯特尼筛法':<20} {count_era:<12,} {sum_era:<20,} {time_era:<12.4f}")
print(f"{limit:<12,} {'Atkin筛法':<20} {count_atkin:<12,} {sum_atkin:<20,} {time_atkin:<12.4f}")
print(f"{'':<12} {'速度比(埃氏筛/Atkin)':<20} {'':<12} {'':<20} {time_era/time_atkin:<12.2f}x")
print("-" * 80)
return results
def quick_verify():
"""快速验证两种算法的正确性"""
test_limits = [100, 1000, 10000]
for limit in test_limits:
primes_era = sieve_eratosthenes(limit)
primes_atkin = sieve_atkin(limit)
assert primes_era == primes_atkin, f"算法结果不一致,上限={limit}"
print("✓ 验证通过:埃拉托斯特尼筛法和Atkin筛法结果一致")
print()
if __name__ == "__main__":
# 首先验证算法正确性
quick_verify()
# 定义要测试的上限
test_limits = [1_000_000, 5_000_000, 10_000_000] # 100万、500万、1000万
# 执行性能比较
compare_algorithms(test_limits)
执行结果
C:\d>python comp_prime.py
✓ 验证通过:埃拉托斯特尼筛法和Atkin筛法结果一致
================================================================================
上限 算法 素数个数 素数总和 耗时(秒)
================================================================================
1,000,000 埃拉托斯特尼筛法 78,498 37,550,402,023 0.0385
1,000,000 Atkin筛法 78,498 37,550,402,023 0.1868
速度比(埃氏筛/Atkin) 0.21 x
--------------------------------------------------------------------------------
5,000,000 埃拉托斯特尼筛法 348,513 838,596,693,108 0.2012
5,000,000 Atkin筛法 348,513 838,596,693,108 0.9766
速度比(埃氏筛/Atkin) 0.21 x
--------------------------------------------------------------------------------
10,000,000 埃拉托斯特尼筛法 664,579 3,203,324,994,356 0.4287
10,000,000 Atkin筛法 664,579 3,203,324,994,356 2.0092
速度比(埃氏筛/Atkin) 0.21 x
--------------------------------------------------------------------------------
2.numpy版本,Atkin筛法改为向量化算法,埃拉托斯特尼筛法的列表改用np数组。
python
import time
import numpy as np
from math import isqrt
def sieve_atkin_vectorized(limit):
"""
使用NumPy向量化的Atkin筛法求素数
返回素数列表
"""
if limit < 2:
return []
if limit == 2:
return [2]
if limit == 3:
return [2, 3]
# 使用int8类型数组记录翻转次数(模2)
is_prime = np.zeros(limit + 1, dtype=np.int8)
is_prime[2] = 1
is_prime[3] = 1
limit_sqrt = isqrt(limit)
# 创建x和y的网格
x = np.arange(1, limit_sqrt + 1, dtype=np.int32)
y = np.arange(1, limit_sqrt + 1, dtype=np.int32)
xx = x * x
yy = y * y
# 向量化计算 4x^2 + y^2
n1 = 4 * xx[:, np.newaxis] + yy[np.newaxis, :]
mask1 = (n1 <= limit) & ((n1 % 12 == 1) | (n1 % 12 == 5))
# 使用bincount统计每个n出现的次数(模2)
counts1 = np.bincount(n1[mask1], minlength=limit + 1)
is_prime = (is_prime + counts1) % 2
# 向量化计算 3x^2 + y^2
n2 = 3 * xx[:, np.newaxis] + yy[np.newaxis, :]
mask2 = (n2 <= limit) & (n2 % 12 == 7)
counts2 = np.bincount(n2[mask2], minlength=limit + 1)
is_prime = (is_prime + counts2) % 2
# 向量化计算 3x^2 - y^2 (需要 x > y)
X, Y = np.meshgrid(x, y, indexing='ij')
n3 = 3 * X * X - Y * Y
mask3 = (X > Y) & (n3 <= limit) & (n3 % 12 == 11)
counts3 = np.bincount(n3[mask3], minlength=limit + 1)
is_prime = (is_prime + counts3) % 2
# 转换为bool数组
is_prime_bool = is_prime.astype(bool)
# 移除平方数的倍数
for r in range(5, limit_sqrt + 1):
if is_prime_bool[r]:
rr = r * r
is_prime_bool[rr:limit+1:rr] = False
# 收集素数
primes = np.where(is_prime_bool)[0].tolist()
return primes
def sieve_eratosthenes(limit):
"""
埃拉托斯特尼筛法求素数(保持原实现)
返回素数列表
"""
if limit < 2:
return []
is_prime = np.ones(limit + 1, dtype=bool)
is_prime[0:2] = False
for i in range(2, isqrt(limit) + 1):
if is_prime[i]:
is_prime[i*i:limit+1:i] = False
primes = np.where(is_prime)[0].tolist()
return primes
def sum_of_primes(primes_list):
"""计算素数列表的和"""
return sum(primes_list)
def compare_algorithms(limits):
"""
比较埃拉托斯特尼筛法和向量化Atkin筛法的性能
limits: 要测试的上限列表
"""
print("=" * 80)
print(f"{'上限':<12} {'算法':<25} {'素数个数':<12} {'素数总和':<20} {'耗时(秒)':<12}")
print("=" * 80)
for limit in limits:
# 测试埃拉托斯特尼筛法
start_time = time.perf_counter()
primes_era = sieve_eratosthenes(limit)
time_era = time.perf_counter() - start_time
sum_era = sum_of_primes(primes_era)
count_era = len(primes_era)
# 测试向量化Atkin筛法
start_time = time.perf_counter()
primes_atkin = sieve_atkin_vectorized(limit)
time_atkin = time.perf_counter() - start_time
sum_atkin = sum_of_primes(primes_atkin)
count_atkin = len(primes_atkin)
# 验证结果一致
assert count_era == count_atkin, f"素数个数不一致: {count_era} vs {count_atkin}"
assert sum_era == sum_atkin, f"素数和不一致: {sum_era} vs {sum_atkin}"
# 打印结果
print(f"{limit:<12,} {'埃拉托斯特尼筛法':<25} {count_era:<12,} {sum_era:<20,} {time_era:<12.4f}")
print(f"{limit:<12,} {'Atkin筛法(向量化)':<25} {count_atkin:<12,} {sum_atkin:<20,} {time_atkin:<12.4f}")
print(f"{'':<12} {'速度比(埃氏筛/Atkin)':<25} {'':<12} {'':<20} {time_era/time_atkin:<12.2f}x")
print("-" * 80)
def quick_verify():
"""快速验证向量化Atkin筛法的正确性"""
test_limits = [100, 1000, 10000]
for limit in test_limits:
primes_era = sieve_eratosthenes(limit)
primes_atkin = sieve_atkin_vectorized(limit)
#print(primes_atkin)
assert primes_era == primes_atkin, f"算法结果不一致,上限={limit}"
print("✓ 验证通过:埃拉托斯特尼筛法和向量化Atkin筛法结果一致")
print()
if __name__ == "__main__":
# 验证算法正确性
quick_verify()
# 定义要测试的上限(根据内存调整,500万可能内存较大)
test_limits = [1_000_000, 5_000_000, 10_000_000] # 100万、500万
# 注意:1000万时n1网格约3162x3162=1000万个元素,内存约80MB,可以根据机器内存调整
# 执行性能比较
compare_algorithms(test_limits)
执行结果
C:\d>python comp_prime2.py
✓ 验证通过:埃拉托斯特尼筛法和向量化Atkin筛法结果一致
================================================================================
上限 算法 素数个数 素数总和 耗时(秒)
================================================================================
1,000,000 埃拉托斯特尼筛法 78,498 37,550,402,023 0.0057
1,000,000 Atkin筛法(向量化) 78,498 37,550,402,023 0.0411
速度比(埃氏筛/Atkin) 0.14 x
--------------------------------------------------------------------------------
5,000,000 埃拉托斯特尼筛法 348,513 838,596,693,108 0.0159
5,000,000 Atkin筛法(向量化) 348,513 838,596,693,108 0.1987
速度比(埃氏筛/Atkin) 0.08 x
--------------------------------------------------------------------------------
10,000,000 埃拉托斯特尼筛法 664,579 3,203,324,994,356 0.0283
10,000,000 Atkin筛法(向量化) 664,579 3,203,324,994,356 0.4150
速度比(埃氏筛/Atkin) 0.07 x
--------------------------------------------------------------------------------