R/d2及S/C4估计总体标准差，比较其CPK及规格限概率的差异

R/d2 和 S/C4 是用于估计总体标准差的无偏估计方法，通常用于控制图中。这些估计方法的主要目的是通过样本数据来估计总体标准差，以便监测过程的稳定性和变异性，而不需要收集整个总体的数据。

具体来说：

R图中的 R/d2 和 S图中的 S/C4 都是无偏估计，其中 d2 和 C4 是常数，用于修正范围和标准差以获得更接近总体标准差的估计值。

python 复制代码

import numpy as np
import scipy.stats as stats
from scipy.stats import norm
import matplotlib.pyplot as plt
# 输入数据
data = [
    [6.4, 7.0, 6.4, 6.4, 7.1],
    [6.8, 6.4, 6.4, 6.3, 6.5],
    [6.3, 7.1, 6.5, 6.4, 7.0],
    [6.1, 6.8, 5.9, 5.8, 6.0],
    [6.4, 6.9, 6.8, 6.5, 6.9],
    [6.6, 6.0, 6.1, 6.2, 5.9],
    [6.3, 6.9, 6.6, 6.2, 6.8],
    [6.4, 5.6, 6.2, 6.0, 5.8],
    [6.3, 6.7, 6.6, 6.4, 6.3],
    [6.7, 5.9, 5.8, 6.3, 6.2],
    [6.6, 7.0, 6.5, 6.4, 7.1],
    [6.8, 6.2, 6.5, 6.2, 5.8]
]
# data1 输入过程上下限
upper_spec_limit = 7
lower_spec_limit = 5.8


data1 = [
    [5.05, 5.03, 5.04, 4.91, 4.99, 4.97, 4.97, 4.91, 4.97, 5.05],
    [4.96, 4.94, 4.93, 4.91, 5.04, 5.01, 4.96, 4.93, 4.90, 5.02],
    [4.99, 5.05, 5.04, 5.05, 5.00, 4.98, 4.90, 5.05, 4.91, 4.93],
    [5.05, 4.93, 4.91, 4.99, 4.92, 4.91, 5.04, 4.98, 5.03, 5.01],
    [5.05, 5.00, 4.92, 4.93, 5.00, 4.98, 5.10, 4.95, 4.91, 4.98],
    [5.02, 5.01, 5.01, 4.91, 5.05, 4.97, 5.03, 5.03, 4.98, 5.06],
    [4.91, 5.02, 5.07, 5.05, 5.08, 5.01, 5.03, 5.03, 5.07, 4.96],
    [4.95, 4.98, 4.91, 5.03, 5.03, 5.08, 4.92, 5.09, 5.10, 5.00],
    [4.98, 4.97, 4.97, 5.09, 5.03, 5.07, 5.08, 4.96, 4.96, 5.03],
    [4.99, 4.91, 5.02, 4.97, 5.04, 4.99, 5.05, 4.93, 5.05, 4.92]
]  #检测不符合正态，凑合着演示用
# data1 输入过程上下限
# upper_spec_limit = 5.2
# lower_spec_limit = 4.8

# 3. Shapiro-Wilk检验
shapiro_stat, shapiro_p = stats.shapiro(data)
print("\nShapiro-Wilk检验统计值:", shapiro_stat)
print("Shapiro-Wilk检验p-value:", shapiro_p)
if shapiro_p > 0.05:
    print("数据可能来自正态分布")
else:
    print("数据可能不来自正态分布")


# 选择适当的d2及C4 值（与样本容量有关）
sample_size = len(data[0])  # 假设所有组的样本容量相同
# 根据样本容量选择 C4 值
C4_values = {
    5: 0.9400,
    6: 0.9515,
    7: 0.9594,
    8: 0.9650,
    9: 0.9693,
    10: 0.9727,
}
d2_values = {
5: 2.326, 
6: 2.534,
7: 2.704, 
8: 2.847, 
9: 2.970, 
10: 3.078, 
}

print("sample_size子组样本容量: ", sample_size)
C4 = C4_values.get(sample_size, None)
print("根据子组样本容量选择C4值: ",C4)
d2 = d2_values.get(sample_size, None)
print("根据子组样本容量选择d2值: ",d2)


# 计算data整体标准差
population_std = np.std(data)

# 计算data样本标准差
sample_std = np.std(data, ddof=1)  # 使用ddof参数来指定自由度

print("\ndata整体标准差:", population_std)
print("data样本标准差:", sample_std)


# 计算X-bar图的x_double_bar中心线均值,用于绘制概率密度曲线
x_double_bar = np.mean([np.mean(subgroup) for subgroup in data]) #x_double_bar中心线均值


# 计算R/d2估计的总体标准差
r_values = [max(subgroup) - min(subgroup) for subgroup in data]
r_bar = np.mean(r_values)
sigma_r = r_bar / d2

# 计算S/C4估计的总体标准差
s_values = [np.std(subgroup, ddof=1) for subgroup in data]
s_bar = np.mean(s_values)
sigma_s = s_bar / C4


# 计算CPK, 其中x_double_bar中心线均值
cpk_r = min((upper_spec_limit - x_double_bar) / (3 * sigma_r), (x_double_bar - lower_spec_limit) / (3 * sigma_r))
cpk_s = min((upper_spec_limit - x_double_bar) / (3 * sigma_s), (x_double_bar - lower_spec_limit) / (3 * sigma_s))

print("\n通过R/d2估计的总体标准差 (σ):", sigma_r)
print("通过S/C4估计的总体标准差 (σ):", sigma_s)
print("R/d2法计算的CPK:", cpk_r)
print("S/C4法计算的CPK:", cpk_s)


# 计算标准分数
z_upper_r = (upper_spec_limit - x_double_bar) / sigma_r  # 使用R/d2估计的σ
z_lower_r = (lower_spec_limit - x_double_bar) / sigma_r  # 使用R/d2估计的σ

z_upper_s = (upper_spec_limit - x_double_bar) / sigma_s  # 使用S/C4估计的σ
z_lower_s = (lower_spec_limit - x_double_bar) / sigma_s  # 使用S/C4估计的σ


# 计算在规格限内的概率（使用R/d2估计的σ）
probability_within_spec_r = norm.cdf(z_upper_r) - norm.cdf(z_lower_r)

# 计算在规格限内的概率（使用S/C4估计的σ）
probability_within_spec_s = norm.cdf(z_upper_s) - norm.cdf(z_lower_s)

print("\n使用R/d2法估计的概率(在规格限内):", probability_within_spec_r)
print("使用S/C4法估计的概率(在规格限内):", probability_within_spec_s)




# 将数据展开为一维数组,用于画data数据集直方图
data_flat = [item for sublist in data for item in sublist]
#  flat_data = np.concatenate(data) # 扁平化数据


# 绘制整体数据集的直方图并叠加概率密度曲线，标准差用sigma_r=R/d2估计
plt.figure(figsize=(5, 5))
plt.hist(data_flat, bins=12, density=True, alpha=0.6, color='b', label='Histogram')
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, x_double_bar, sigma_r)
plt.plot(x, p, 'r', linewidth=2, label='PDF (Population)')
plt.axvline(x_double_bar, color='r', linestyle='--', label='X-Bar̄')
plt.axvline(upper_spec_limit, color='b', linestyle='-', label='USL')
plt.axvline(lower_spec_limit, color='b', linestyle='-', label='LSL')
plt.xlabel('Value')
plt.ylabel('Probability')
plt.title('Histogram (Population)')
plt.show()

Shapiro-Wilk检验统计值: 0.9730015993118286

Shapiro-Wilk检验p-value: 0.20416395366191864
数据可能来自正态分布

sample_size子组样本容量: 5

根据子组样本容量选择C4值: 0.94

根据子组样本容量选择d2值: 2.326

data整体标准差: 0.3711094477673968
data样本标准差: 0.37424122858811426

通过R/d2估计的总体标准差 (σ): 0.3116938950988822
通过S/C4估计的总体标准差 (σ): 0.3245199375603761
R/d2法计算的CPK: 0.6238314176245208
S/C4法计算的CPK: 0.5991756497496196

使用R/d2法估计的概率(在规格限内): 0.9454219702532735
使用S/C4法估计的概率(在规格限内): 0.9351733645056909

Finished in 13.9s

绘制X-Bar和R图，及数据集直方图概率密度曲线

python 复制代码

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# 输入数据
data = [
    [6.4, 7.0, 6.4, 6.4, 7.1],
    [6.8, 6.4, 6.4, 6.3, 6.5],
    [6.3, 7.1, 6.5, 6.4, 7.0],
    [6.1, 6.8, 5.9, 5.8, 6.0],
    [6.4, 6.9, 6.8, 6.5, 6.9],
    [6.6, 6.0, 6.1, 6.2, 5.9],
    [6.3, 6.9, 6.6, 6.2, 6.8],
    [6.4, 5.6, 6.2, 6.0, 5.8],
    [6.3, 6.7, 6.6, 6.4, 6.3],
    [6.7, 5.9, 5.8, 6.3, 6.2],
    [6.6, 7.0, 6.5, 6.4, 7.1],
    [6.8, 6.2, 6.5, 6.2, 5.8]
]

# 输入过程上下限
upper_spec_limit = 7
lower_spec_limit = 5.5

# 控制图参数
A2 = 0.577
D4 = 2.113
D3 = 0
d2 = 2.326  # 从表格或标准文献中查找



# 计算X-Bar和R
x_bar = np.mean(data, axis=1)
r_values = np.ptp(data, axis=1)

# 计算X-Bar和R的平均值
x_bar_bar = np.mean(x_bar)
r_bar = np.mean(r_values)
# r_values = [max(subgroup) - min(subgroup) for subgroup in data] #极差均值
# x_double_bar = np.mean([np.mean(subgroup) for subgroup in data]) #x_bar_bar中心线均值


# 将数据展开为一维数组,用于画data数据集直方图
data_flat = [item for sublist in data for item in sublist]
# 计算整体标准差
# population_std = np.std(data_flat)
population_std = r_bar / d2


# 计算UCL和LCL (X-Bar)
UCL_x_bar = x_bar_bar + A2 * r_bar
LCL_x_bar = x_bar_bar - A2 * r_bar

# 计算UCL和LCL (R)
UCL_r = D4 * r_bar
LCL_r = D3 * r_bar

# 绘制X-Bar控制图
plt.figure(figsize=(6, 6))
plt.subplot(3, 1, 1)

plt.plot(x_bar, 'o-', label='X-Bar')
plt.axhline(x_bar_bar, color='r', linestyle='--', label='X-Bar̄')
plt.axhline(UCL_x_bar, color='g', linestyle='--', label='UCL(X-Bar)')
plt.axhline(LCL_x_bar, color='g', linestyle='--', label='LCL(X-Bar)')
plt.ylabel('X-Bar')
# plt.legend()

plt.subplot(3, 1, 2)
plt.plot(r_values, 'o-', color='b', label='R')
plt.axhline(r_bar, color='r', linestyle='--', label='R̄')
plt.axhline(UCL_r, color='g', linestyle='--', label='UCL(R)')
plt.axhline(LCL_r, color='g', linestyle='--', label='LCL(R)')
# plt.xlabel('Sample')
plt.ylabel('R')
# plt.legend()
# plt.title('X-Bar-R')

# 绘制整体数据集的直方图并叠加概率密度曲线
plt.subplot(3, 1, 3)
plt.hist(data_flat, bins=12, density=True, alpha=0.6, color='b', label='Histogram')
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, np.mean(data_flat), population_std)
plt.plot(x, p, 'k', linewidth=2, label='PDF (Population)')
plt.axvline(x_bar_bar, color='r', linestyle='--', label='X-Bar̄')
plt.axvline(UCL_x_bar, color='g', linestyle='--', label='UCL(X-Bar)')
plt.axvline(LCL_x_bar, color='g', linestyle='--', label='LCL(X-Bar)')
plt.axvline(upper_spec_limit, color='b', linestyle='-', label='USL')
plt.axvline(lower_spec_limit, color='b', linestyle='-', label='LSL')
plt.xlabel('Value')
plt.ylabel('Probability')
# plt.legend()
plt.title('Histogram (Population)')
plt.tight_layout()
plt.show()


print("np.std(data_flat)估计总体标准差",np.std(data_flat))
print("r_bar/d2估计总体标准差",r_bar / d2)