-- 如何快速生成一张表数据 stack()
-- 例如:生成两行两列的数据
--------------------------------
--| 男 | M |
--------------------------------
--| 女 | F |
-----------------------------
-- 快速生成表数据的函数:stack(N,数据列表)
-- N 表示生成几行数
-- 数据列表:用于放置每一行的数据,函数内部会自动列表平均分为N分,放置到每一行中
select stack(2,'男','M','女','F');
-- 如何将这个表结果保存起来呢?共后续使用呢?
-- 可以通过子查询的方式,将SQL作为一个临时结果集使用
select
*
from(select stack(2,'男''M''女''F'))as t1;
-- 通过构建表的方式:
create table if not exists t3 as select stack(2,'男','M''女''F');
2.3 回顾窗口函数
sql复制代码
-- 窗口函数: over(partition by xxx order by xxx [desc asc] [rows betweenxx and xxx ]
-- 1- row_number() rank() dense_rank() ntile()
-- 2- 与聚合函数配合使用: sum() avg() max() min() count()
-- 3- lag() lead() first_value() last_value()
--测试数据
create or replace temporary view t1 (id,name,score)as
values (1,'张三',90),
(1,'李四',80),
(1,'玉五',85),
(1,'赵六',80),
(1,'田七',76),
(2,'周八',90),
(2,'李九',80),
(2,'老王',60);
select * from t1;
-- 1- rownumber rank dense_rank ntile
select
id,
name,
score,
row_number() over (partition by id order by score) as rank1,
rank() over (partition by id order by score) as rank2,
dense_rank() over (partition by id order by score) as rank3,
ntile(3) over (partition by id order by score) as rank4
from t1;
select
id,
name,
score,
sum(score) over (partition by id order by score) as rank1,
sum(score) over (partition by id order by score rows between unbounded preceding and current row ) as rank2,
sum(score) over (partition by id) as rank3,
sum(score) over (partition by id order by score rows between unbounded preceding and unbounded following ) as rank4
from t1;
/*
lag(字段,N,默认值): 将当前行和之前第N行放置一行中
lead(字段,N,默认值): 将当前行和之后第N行放置一行中
first_value(字段): 将当前行和第一行放置在一行中
将当前行和最后一行放置在一行中,但是不能添加排序操作,否则只能和当前行处理了
既想排序 又想和最后一行比较,请使用rows between条 所有整个组的全范围即可:
rows between unbounded preceding and current row
*/
select
id,
name,
score,
lag(score,2,100) over (partition by id order by score) as rank1,
lead(score,2,100) over (partition by id order by score) as rank2,
first_value(score) over (partition by id order by score) as rank3,
last_value(score) over (partition by id) as rank4
from t1;
3. 迭代计算
3.1 横向迭代计算
sql复制代码
-- 演示 如何进行 横向迭代计算操仵:
-- 需求:已知 c1列数锯,计算岀 c2 和 c3列数据
-- c1 有1,2,3 c2: c1+2 c3: c1*(c2+3)
-- 初始化数据
create or replace temporary view t1 as
select explode(`array`(1,2,3)) as c1;
--计算c2
with t2 as (
select
c1,
c1+2 as c2
from t1
)
--计算c2
select
c1,
c2,
c1*(c2+3) as c3
from t2;
-- 此脚本用于计算保费信息:
-- 1- 先生成维度表信息(19338种)
-- 性别:
create or replace view insurance_dw.prem_src0_sex as
select stack(2,'M','F') as sex;
-- 缴费期: 10 15 20 30
create or replace view insurance_dw.prem_src0_ppp as
select stack(4,10,15,20,30) as ppp;
-- 投保年龄: 18~60
create or replace view insurance_dw.prem_src0_age_buy as
select explode(sequence(18,60)) as age_buy;
-- 保单年度:
create or replace view insurance_dw.prem_src0_policy_year as
select explode(sequence(1,88)) as policy_year;
-- 构建一个常量标准数据表:
create or replace view insurance_dw.input as
select 0.035 interest_rate, --预定利息率(Interest Rate PREM&RSV)
0.055 interest_rate_cv,--现金价值预定利息率(Interest Rate CV)
0.0004 acci_qx,--意外身故死亡发生率(Accident_qx)
0.115 rdr,--风险贴现率(Risk Discount Rate)
10000 sa,--基本保险金额(Baisc Sum Assured)
1 average_size,--平均规模(Average Size)
1 MortRatio_Prem_0,--Mort Ratio(PREM)
1 MortRatio_RSV_0,--Mort Ratio(RSV)
1 MortRatio_CV_0,--Mort Ratio(CV)
1 CI_RATIO,--CI Ratio
6 B_time1_B,--生存金给付时间(1)---begain
59 B_time1_T,--生存金给付时间(1)-terminate
0.1 B_ratio_1,--生存金给付比例(1)
60 B_time2_B,--生存金给付时间(2)-begain
106 B_time2_T,--生存金给付时间(2)-terminate
0.1 B_ratio_2,--生存金给付比例(2)
70 MB_TIME,--祝寿金给付时间
0.2 MB_Ration,--祝寿金给付比例
0.7 RB_Per,--可分配盈余分配给客户的比例
0.7 TB_Per,--未分配盈余分配给客户的比例
1 Disability_Ratio,--残疾给付保险金保额倍数
0.1 Nursing_Ratio,--长期护理保险金保额倍数
75 Nursing_Age--长期护理保险金给付期满年龄
;
-- 组装四个维度:
create or replace view insurance_dw.prem_src0 as
select
t3.age_buy,
t5.Nursing_Age,
t1.sex,
t5.B_time2_T as t_age,
t2.ppp,
t5.B_time2_T - t3.age_buy as bpp,
t5.interest_rate,
t5.sa,
t4.policy_year,
(t3.age_buy + t4.policy_year) - 1 as age
from insurance_dw.prem_src0_sex t1 join insurance_dw.prem_src0_ppp t2 on 1=1
join insurance_dw.prem_src0_age_buy t3 on t3.age_buy >= 18 and t3.age_buy <= 70 - t2.ppp
join insurance_dw.prem_src0_policy_year t4 on t4.policy_year >=1 and t4.policy_year <= 106 - t3.age_buy
join insurance_dw.input as t5 on 1=1;
-- 校验维度表
select * from insurance_dw.prem_src0;
3. 保费参数因子表计算
开启精度保护
--开启spark精度保护 set spark.sql.decimal0perations.allowPrecisionLoss=false;
3.1 完成步骤一
sql复制代码
-- 步骤一:计算 ppp_ 和 bpp_
create or replace view insurance_dw.prem_src1 as
select *,
if(policy_year <= ppp, 1, 0) as ppp_,
if(policy_year <= bpp, 1, 0) as bpp_
from insurance_dw.prem_src0;
--校验
select *
from insurance_dw.prem_src1
where age_buy = 45
and sex = 'F'
and ppp = 15;
4.3.2 完成步骤二
sql复制代码
--步骤二:qx kx 和 qx_ci
create or replace view insurance_dw.prem_src2 as
select t1.*,
cast((
if(
t1.age <= 105,
if(t1.sex = 'M', t3.cl1, t3.cl3),
0)
) * t2.MortRatio_Prem_0 * t1.bpp_ as decimal(17, 8)) as qx,
(
if(
t1.age <= 105,
if(t1.sex = 'M', t4.k_male, t4.k_female),
0)
) * t1.bpp_ as kx,
(
if(
t1.sex = 'M',
t4.male,
t4.female)
) * t1.bpp_ as qx_ci
from insurance_dw.prem_src1 t1
join insurance_dw.input t2 on 1 = 1
join insurance_ods.mort_10_13 t3 on t1.age = t3.age
join insurance_ods.dd_table t4 on t1.age = t4.age;
--校验
select *
from insurance_dw.prem_src2
where age_buy = 45
and sex = 'F'
and ppp = 15;
4.3.3 完成步骤三
sql复制代码
select
t1.*,
(
if(t1.age >= 105, t1.qx - t1.qx_ci, t1.qx*(1-t1.kx))
) * t1.bpp_ as qx_d
from insurance_dw.prem_src2 t1;
使用spark程序读取SQL脚本执行
python复制代码
import os
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
os.environ['SPARK_HOME'] = '/export/server/spark'
os.environ['PYSPARK_PYTHON'] = '/root/anaconda3/bin/python3'
os.environ['PYSPARK_DIRVER_PYTHON'] = '/root/anaconda3/bin/python3'
# 工具函数(方法) :
def executeSQLFile(filename):
with open(r'../sparksql_script/' + filename, 'r') as f:
read_data = f.readlines()
# 将列表的一行一行拼接成一个长文本,就是SQL文件的内容
read_data = ''.join(read_data)
# 将文本内容按分号切割得到数组,每个元素预计是一个完整语句
arr = read_data.split(";")
# 对每个SQL,如果是空字符串或空文本,则剔除掉
# 注意,你可能认为空字符串''也算是空白字符,但其实空字符串''不是空白字符 ,即''.isspace()返回的是False
arr2 = list(filter(lambda x: not x.isspace() and not x == "", arr))
# 对每个SQL语句进行迭代
for sql in arr2:
# 先打印完整的SQL语句。
print(sql, ";")
# 由于SQL语句不一定有意义,比如全是--注释;,他也以分号结束,但是没有意义不用执行。
# 对每个SQL语句,他由多行组成,sql.splitlines()数组中是每行,挑选出不是空白字符的,也不是空字符串''的,也不是--注释的。
# 即保留有效的语句。
filtered = filter(lambda x: (not x.lstrip().startswith("--")) and (not x.isspace()) and (not x.strip() == ''),
sql.splitlines())
# 下面数组的元素是SQL语句有效的行
filtered = list(filtered)
# 有效的行数>0,才执行
if len(filtered) > 0:
df = spark.sql(sql)
# 如果有效的SQL语句是select开头的,则打印数据。
if filtered[0].lstrip().startswith("select"):
df.show(100)
if __name__ == '__main__':
print("保险项目的spark程序的入口:")
# 1- 创建 SparkSession对象: 支持与HIVE的集成
spark = SparkSession \
.builder \
.master("local[*]") \
.appName("insurance_main") \
.config("spark.sql.shuffle.partitions", 4) \
.config("spark.sql.warehouse.dir", "hdfs://node1:8020/user/hive/warehouse") \
.config("hive.metastore.uris", "thrift://node1:9083") \
.enableHiveSupport() \
.getOrCreate()
# 2) 编写SQL执行:
executeSQLFile('04_insurance_dw_prem_std.sql')
4.3.4 完成步骤四
python复制代码
# 定义lx的函数 udaf_lx
@F.pandas_udf('decimal(17,12)')
def udaf_lx(lx:pd.Series,qx:pd.Series) -> decimal :
tmp_lx = decimal.Decimal(0)
tmp_qx = decimal.Decimal(0)
for i in range(len(lx)):
if i == 0:
tmp_lx = decimal.Decimal(lx[0])
tmp_qx = decimal.Decimal(qx[0])
else:
tmp_lx = (tmp_lx * (1- tmp_qx)).quantize(decimal.Decimal('0.000000000000'))
tmp_qx = decimal.Decimal(qx[0])
return tmp_lx
spark.udf.register('udaf_lx',udaf_lx)
sql复制代码
--步骤四:lx
create or replace view insurance_dw.prem_src4_1 as
select
t1.*,
if(policy_year = 1,1,null) as lx
from insurance_dw.prem_src3 t1;
--步骤4_2:lx
-- 通过对 ppp(缴费期)sex age_buy(投保年龄) 分组,即可将每组中对应的保单年度放置在一组内,进行计算操作
drop table if exists insurance_dw.prem_src4;
create table if not exists insurance_dw.prem_src4 as
select
*,
udaf_lx(lx,qx) over(partition by ppp,sex,age_buy order by policy_year) as lx
from insurance_dw.prem_src4_1;
--步骤五:lx_d dx_d dx_ci
--5_1
create or replace view insurance_dw.prem_src5_1 as
select *,
if(policy_year = 1, 1, null) as lx_d,
if(policy_year = 1, qx_d, null) as dx_d,
if(policy_year = 1, qx_ci, null) as dx_ci
from insurance_dw.prem_src4_2;
--5_2
drop table if exists insurance_dw.prem_src5_2;
create table if not exists insurance_dw.prem_src5_2 as
select age_buy,
Nursing_Age,
sex,
t_age,
ppp,
bpp,
interest_rate,
sa,
policy_year,
age,
ppp_,
bpp_,
qx,
kx,
qx_ci,
qx_d,
lx,
udaf_3col(lx_d, qx_d, qx_ci) over (partition by ppp,sex,age_buy order by policy_year) as 3col
from insurance_dw.prem_src5_1;
-- 5_3最后:将三列合并数据切割开,,形成三列结果数热
create or replace view insurance_dw.prem_src5_3 as
select age_buy,
Nursing_Age,
sex,
t_age,
ppp,
bpp,
interest_rate,
sa,
policy_year,
age,
ppp_,
bpp_,
qx,
kx,
qx_ci,
qx_d,
lx,
cast(split(3col,',')[0] as decimal(17,12)) as lx_d,
cast(split(3col,',')[0] as decimal(17,12)) as dx_d,
cast(split(3col,',')[0] as decimal(17,12)) as dx_ci
from insurance_dw.prem_src5_2;
4.3.6 完成步骤六
sql复制代码
--步骤六:cx
-- pow()幂次方计算
-- pow((1+interest_rate),(age+1) ) => 1+interest_rate)^(age+1)
create or replace view insurance_dw.prem_src6 as
select
*,
dx_d / pow((1+interest_rate),(age+1) ) as cx
from insurance_dw.prem_src5_3;
4.3.7 完成步骤七
sql复制代码
--步骤七:cx_ ci_cx
create or replace view insurance_dw.prem_src7 as
select
*,
cx * pow((1+interest_rate),0.5) as cx_,
dx_ci / pow((1+interest_rate),(age+1)) as ci_cx
from insurance_dw.prem_src6;
4.3.8 完成步骤八
sql复制代码
--步骤八:ci_cx_ dx dx_d_
create or replace view insurance_dw.prem_src8 as
select
*,
ci_cx * pow((1+interest_rate),0.5) as ci_cx_,
lx / pow((1+interest_rate),age) as dx,
lx_d / pow((1+interest_rate),age) as dx_d_
from insurance_dw.prem_src7;
4.3.9 完成步骤九
sql复制代码
--步骤九:expense DB1 db2_factor
create or replace view insurance_dw.prem_src9 as
select t1.*,
(
case
when t1.policy_year = 1 then t2.r1
when t1.policy_year = 2 then t2.r2
when t1.policy_year = 3 then t2.r3
when t1.policy_year = 4 then t2.r4
when t1.policy_year = 5 then t2.r5
else t2.r6_
end
) * t1.ppp_ as expense,
t3.Disability_Ratio * t1.bpp_ as db1,
(
if(t1.age < t1.Nursing_Age, 1, 0)
) * t3.Nursing_Ratio as db2_factor
from insurance_dw.prem_src8 t1
join insurance_ods.pre_add_exp_ratio t2 on t1.ppp = t2.PPP
join insurance_dw.input t3 on 1 = 1;
4.3.10 完成步骤十
sql复制代码
--步骤十:db2 db3 db4 db5
-- least() 即在多列中取最小值
--least() => if( t1.ppp > t1.policy_year, t1.policy_year ,t1.ppp)
create or replace view insurance_dw.prem_src10 as
select t1.*,
(
sum(t1.dx * t1.db2_factor)
over (partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
) / t1.dx as db2,
(
if(t1.age >= t1.Nursing_Age, 1, 0)
) * t2.Nursing_Ratio as db3,
least(t1.ppp, t1.policy_year) as db4,
(
ifnull(sum(t1.dx * t1.ppp_)
over (partition by ppp,sex,age_buy order by policy_year rows between 1 following and unbounded following),
0)
/ t1.dx
) * pow((1 + t1.interest_rate), 0.5) as db5
from insurance_dw.prem_src9 t1
join insurance_dw.input t2 on 1 = 1;
-- 步骤 11: 计算保费前的中间结果值: 先分组, 然后进行统计
create or replace view insurance_dw.prem_std_src11 as
select
ppp, sex, age_buy,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * db1 * pow((1 + interest_rate), -0.25),
ci_cx_ * db1)
) as t11,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * db2 * pow((1 + interest_rate), -0.25),
ci_cx_ * db2)
) as v11,
sum(dx * db3) as w11,
sum(dx * ppp_) as q11,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * pow((1 + interest_rate), 0.25), 0)
) as t9,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * pow((1 + interest_rate), 0.25), 0)
) as v9,
sum(dx * expense) as s11,
sum(cx_ * db4) as x11,
sum(ci_cx_ * db5) as y11
from insurance_dw.prem_src
group by ppp, sex, age_buy;
4.4.2 完成步骤十二
sql复制代码
-- 步骤十二: 核算保费:
create or replace view insurance_dw.prem_std_src12 as
select
t1.age_buy,
t1.sex,
t1.ppp,
106-t1.age_buy as bpp,
input.sa * (t1.t11 + t1.v11 + t1.w11) / (t1.q11 -t1.t9 - t1.v9 -t1.s11 - t1.x11 - t1.y11) as prem
from insurance_dw.prem_std_src11 t1 join insurance_dw.input on 1=1 ;
-- 校验:
select * from insurance_dw.prem_std_src12 where age_buy = 50 and sex = 'M' and ppp = 20;
4.4.3 将保费信息数据保存目标表
sql复制代码
insert overwrite table insurance_dw.prem_std
select
age_buy,
sex,
ppp,
bpp,
prem
from insurance_dw.prem_std_src12;
-- 校验数据
select count(1) from insurance_dw.prem_std;
select * from insurance_dw.prem_std where age_buy = 50 and sex = 'M' and ppp = 20;
4.5 示例代码
insurance_main.py
python复制代码
import decimal
import os
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
import pandas as pd
import pyspark.sql.functions as F
os.environ['SPARK_HOME'] = '/export/server/spark'
os.environ['PYSPARK_PYTHON'] = '/root/anaconda3/bin/python3'
os.environ['PYSPARK_DIRVER_PYTHON'] = '/root/anaconda3/bin/python3'
# 工具函数(方法) :
def executeSQLFile(filename):
with open(r'../sparksql_script/' + filename, 'r') as f:
read_data = f.readlines()
# 将列表的一行一行拼接成一个长文本,就是SQL文件的内容
read_data = ''.join(read_data)
# print(read_data)
# 将文本内容按分号切割得到数组,每个元素预计是一个完整语句
arr = read_data.split(";")
# 对每个SQL,如果是空字符串或空文本,则剔除掉
# 注意,你可能认为空字符串''也算是空白字符,但其实空字符串''不是空白字符 ,即''.isspace()返回的是False
arr2 = list(filter(lambda x: not x.isspace() and not x == "", arr))
# print(arr2)
# 对每个SQL语句进行迭代
for sql in arr2:
# 先打印完整的SQL语句。
print(sql, ";")
# 由于SQL语句不一定有意义,比如全是--注释;,他也以分号结束,但是没有意义不用执行。
# 对每个SQL语句,他由多行组成,sql.splitlines()数组中是每行,挑选出不是空白字符的,也不是空字符串''的,也不是--注释的。
# 即保留有效的语句。
filtered = filter(lambda x: (not x.lstrip().startswith("--")) and (not x.isspace()) and (not x.strip() == ''),
sql.splitlines())
# 下面数组的元素是SQL语句有效的行
filtered = list(filtered)
# 有效的行数>0,才执行
if len(filtered) > 0:
df = spark.sql(sql)
# 如果有效的SQL语句是select开头的,则打印数据。
if filtered[0].lstrip().startswith("select"):
df.show(100)
if __name__ == '__main__':
print("保险项目的spark程序的入口:")
# 1- 创建 SparkSession对象: 支持与HIVE的集成
spark = SparkSession \
.builder \
.master("local[*]") \
.appName("insurance_main") \
.config("spark.sql.shuffle.partitions", 4) \
.config("spark.sql.warehouse.dir", "hdfs://node1:8020/user/hive/warehouse") \
.config("hive.metastore.uris", "thrift://node1:9083") \
.enableHiveSupport() \
.getOrCreate()
# 定义lx的函数 udaf_lx
@F.pandas_udf('decimal(17,12)')
def udaf_lx(lx:pd.Series,qx:pd.Series) -> decimal :
tmp_lx = decimal.Decimal(0)
tmp_qx = decimal.Decimal(0)
for i in range(len(lx)):
if i == 0:
tmp_lx = decimal.Decimal(lx[0])
tmp_qx = decimal.Decimal(qx[0])
else:
tmp_lx = (tmp_lx * (1- tmp_qx)).quantize(decimal.Decimal('0.000000000000'))
tmp_qx = decimal.Decimal(qx[0])
return tmp_lx
spark.udf.register('udaf_lx',udaf_lx)
# 定义lx_d,dx_,dx_ci的函数 udaf_lx
@F.pandas_udf('string')
def udaf_3col(lx_d:pd.Series, qx_d:pd.Series, qx_ci:pd.Series) -> str:
tmp_lx_d = decimal.Decimal(0)
tmp_dx_d = decimal.Decimal(0)
tmp_dx_ci = decimal.Decimal(0)
for i in range(0, len(lx_d)):
if i == 0:
tmp_lx_d = decimal.Decimal(lx_d[i])
tmp_dx_d = decimal.Decimal(qx_d[i])
tmp_dx_ci = decimal.Decimal(qx_ci[i])
else:
tmp_lx_d = (tmp_lx_d - tmp_dx_d - tmp_dx_ci).quantize(decimal.Decimal('0.000000000000'))
tmp_dx_d = (tmp_lx_d * qx_d[i]).quantize(decimal.Decimal('0.000000000000'))
tmp_dx_ci = (tmp_lx_d * qx_ci[i]).quantize(decimal.Decimal('0.000000000000'))
return str(tmp_lx_d) + ',' + str(tmp_dx_d) + ',' + str(tmp_dx_ci)
spark.udf.register('udaf_3col', udaf_3col)
# 2) 编写SQL执行:
executeSQLFile('04_insurance_dw_prem_std.sql')
04_insurance_dw_prem_std.sql
sql复制代码
--开启spark精度保护
set spark.sql.decimal0perations.allowPrecisionLoss=false;
-- 此脚本用于计算保费信息:
-- 1- 先生成维度表信息(19338种)
-- 性别:
create or replace view insurance_dw.prem_src0_sex as
select stack(2, 'M', 'F') as sex;
-- 缴费期: 10 15 20 30
create or replace view insurance_dw.prem_src0_ppp as
select stack(4, 10, 15, 20, 30) as ppp;
-- 投保年龄: 18~60
create or replace view insurance_dw.prem_src0_age_buy as
select explode(sequence(18, 60)) as age_buy;
-- 保单年度:
create or replace view insurance_dw.prem_src0_policy_year as
select explode(sequence(1, 88)) as policy_year;
-- 构建一个常量标准数据表:
create or replace view insurance_dw.input as
select 0.035 interest_rate, --预定利息率(Interest Rate PREM&RSV)
0.055 interest_rate_cv,--现金价值预定利息率(Interest Rate CV)
0.0004 acci_qx,--意外身故死亡发生率(Accident_qx)
0.115 rdr,--风险贴现率(Risk Discount Rate)
10000 sa,--基本保险金额(Baisc Sum Assured)
1 average_size,--平均规模(Average Size)
1 MortRatio_Prem_0,--Mort Ratio(PREM)
1 MortRatio_RSV_0,--Mort Ratio(RSV)
1 MortRatio_CV_0,--Mort Ratio(CV)
1 CI_RATIO,--CI Ratio
6 B_time1_B,--生存金给付时间(1)---begain
59 B_time1_T,--生存金给付时间(1)-terminate
0.1 B_ratio_1,--生存金给付比例(1)
60 B_time2_B,--生存金给付时间(2)-begain
106 B_time2_T,--生存金给付时间(2)-terminate
0.1 B_ratio_2,--生存金给付比例(2)
70 MB_TIME,--祝寿金给付时间
0.2 MB_Ration,--祝寿金给付比例
0.7 RB_Per,--可分配盈余分配给客户的比例
0.7 TB_Per,--未分配盈余分配给客户的比例
1 Disability_Ratio,--残疾给付保险金保额倍数
0.1 Nursing_Ratio,--长期护理保险金保额倍数
75 Nursing_Age--长期护理保险金给付期满年龄
;
-- 组装四个维度:
create or replace view insurance_dw.prem_src0 as
select t3.age_buy,
t5.Nursing_Age,
t1.sex,
t5.B_time2_T as t_age,
t2.ppp,
t5.B_time2_T - t3.age_buy as bpp,
t5.interest_rate,
t5.sa,
t4.policy_year,
(t3.age_buy + t4.policy_year) - 1 as age
from insurance_dw.prem_src0_sex t1
join insurance_dw.prem_src0_ppp t2 on 1 = 1
join insurance_dw.prem_src0_age_buy t3 on t3.age_buy >= 18 and t3.age_buy <= 70 - t2.ppp
join insurance_dw.prem_src0_policy_year t4 on t4.policy_year >= 1 and t4.policy_year <= 106 - t3.age_buy
join insurance_dw.input as t5 on 1 = 1;
-- 校验维度表
select count(1)
from insurance_dw.prem_src0;
--====================================计算指标===================================================
-- 步骤一:计算 ppp_ 和 bpp_
create or replace view insurance_dw.prem_src1 as
select *,
if(policy_year <= ppp, 1, 0) as ppp_,
if(policy_year <= bpp, 1, 0) as bpp_
from insurance_dw.prem_src0;
--校验
select *
from insurance_dw.prem_src1
where age_buy = 45
and sex = 'F'
and ppp = 15;
--步骤二:qx kx 和 qx_ci
create or replace view insurance_dw.prem_src2 as
select t1.*,
cast((
if(
t1.age <= 105,
if(t1.sex = 'M', t3.cl1, t3.cl3),
0)
) * t2.MortRatio_Prem_0 * t1.bpp_ as decimal(17, 12)) as qx,
(
if(
t1.age <= 105,
if(t1.sex = 'M', t4.k_male, t4.k_female),
0)
) * t1.bpp_ as kx,
(
if(
t1.sex = 'M',
t4.male,
t4.female)
) * t1.bpp_ as qx_ci
from insurance_dw.prem_src1 t1
join insurance_dw.input t2 on 1 = 1
join insurance_ods.mort_10_13 t3 on t1.age = t3.age
join insurance_ods.dd_table t4 on t1.age = t4.age;
--校验
select *
from insurance_dw.prem_src2
where age_buy = 45
and sex = 'F'
and ppp = 15;
--步骤三:qx_d
create or replace view insurance_dw.prem_src3 as
select t1.*,
cast(
(
if(t1.age >= 105, t1.qx - t1.qx_ci, cast(t1.qx * (1 - t1.kx) as decimal(17, 12)))
) * t1.bpp_
as decimal(17, 12)) as qx_d
from insurance_dw.prem_src2 t1;
--校验
select *
from insurance_dw.prem_src3
where age_buy = 25
and sex = 'M'
and ppp = 20;
--步骤四:lx
create or replace view insurance_dw.prem_src4_1 as
select t1.*,
if(policy_year = 1, 1, null) as lx
from insurance_dw.prem_src3 t1;
--步骤4_2:lx
-- 通过对 ppp(缴费期)sex age_buy(投保年龄) 分组,即可将每组中对应的保单年度放置在一组内,进行计算操作
drop table if exists insurance_dw.prem_src4_2;
create table if not exists insurance_dw.prem_src4_2 as
select age_buy,
Nursing_Age,
sex,
t_age,
ppp,
bpp,
interest_rate,
sa,
policy_year,
age,
ppp_,
bpp_,
qx,
kx,
qx_ci,
qx_d,
udaf_lx(lx, qx) over (partition by ppp,sex,age_buy order by policy_year) as lx
from insurance_dw.prem_src4_1;
--校验
select *
from insurance_dw.prem_src4_2
where age_buy = 25
and sex = 'M'
and ppp = 20;
--步骤五:lx_d dx_d dx_ci
--5_1
create or replace view insurance_dw.prem_src5_1 as
select *,
if(policy_year = 1, 1, null) as lx_d,
if(policy_year = 1, qx_d, null) as dx_d,
if(policy_year = 1, qx_ci, null) as dx_ci
from insurance_dw.prem_src4_2;
--5_2
drop table if exists insurance_dw.prem_src5_2;
create table if not exists insurance_dw.prem_src5_2 as
select age_buy,
Nursing_Age,
sex,
t_age,
ppp,
bpp,
interest_rate,
sa,
policy_year,
age,
ppp_,
bpp_,
qx,
kx,
qx_ci,
qx_d,
lx,
udaf_3col(lx_d, qx_d, qx_ci) over (partition by ppp,sex,age_buy order by policy_year) as 3col
from insurance_dw.prem_src5_1;
-- 5_3最后:将三列合并数据切割开,,形成三列结果数热
create or replace view insurance_dw.prem_src5_3 as
select age_buy,
Nursing_Age,
sex,
t_age,
ppp,
bpp,
interest_rate,
sa,
policy_year,
age,
ppp_,
bpp_,
qx,
kx,
qx_ci,
qx_d,
lx,
cast(split(3col, ',')[0] as decimal(17, 12)) as lx_d,
cast(split(3col, ',')[1] as decimal(17, 12)) as dx_d,
cast(split(3col, ',')[2] as decimal(17, 12)) as dx_ci
from insurance_dw.prem_src5_2;
--校验
select *
from insurance_dw.prem_src5_3
where age_buy = 25
and sex = 'M'
and ppp = 20;
--步骤六:cx
-- pow()幂次方计算
-- pow((1+interest_rate),(age+1) ) => 1+interest_rate)^(age+1)
create or replace view insurance_dw.prem_src6 as
select *,
dx_d / pow((1 + interest_rate), (age + 1)) as cx
from insurance_dw.prem_src5_3;
--校验
select *
from insurance_dw.prem_src6
where age_buy = 25
and sex = 'M'
and ppp = 20;
--步骤七:cx_ ci_cx
create or replace view insurance_dw.prem_src7 as
select *,
cx * pow((1 + interest_rate), 0.5) as cx_,
dx_ci / pow((1 + interest_rate), (age + 1)) as ci_cx
from insurance_dw.prem_src6;
--校验
select *
from insurance_dw.prem_src7
where age_buy = 25
and sex = 'M'
and ppp = 20;
--步骤八:ci_cx_ dx dx_d_
create or replace view insurance_dw.prem_src8 as
select *,
ci_cx * pow((1 + interest_rate), 0.5) as ci_cx_,
lx / pow((1 + interest_rate), age) as dx,
lx_d / pow((1 + interest_rate), age) as dx_d_
from insurance_dw.prem_src7;
--校验
select *
from insurance_dw.prem_src8
where age_buy = 25
and sex = 'M'
and ppp = 20;
--步骤九:expense DB1 db2_factor
create or replace view insurance_dw.prem_src9 as
select t1.*,
(
case
when t1.policy_year = 1 then t2.r1
when t1.policy_year = 2 then t2.r2
when t1.policy_year = 3 then t2.r3
when t1.policy_year = 4 then t2.r4
when t1.policy_year = 5 then t2.r5
else t2.r6_
end
) * t1.ppp_ as expense,
t3.Disability_Ratio * t1.bpp_ as db1,
(
if(t1.age < t1.Nursing_Age, 1, 0)
) * t3.Nursing_Ratio as db2_factor
from insurance_dw.prem_src8 t1
join insurance_ods.pre_add_exp_ratio t2 on t1.ppp = t2.PPP
join insurance_dw.input t3 on 1 = 1;
--校验
select *
from insurance_dw.prem_src9
where age_buy = 25
and sex = 'M'
and ppp = 20;
--步骤十:db2 db3 db4 db5
-- least() 即在多列中取最小值
--least() => if( t1.ppp > t1.policy_year, t1.policy_year ,t1.ppp)
create or replace view insurance_dw.prem_src10 as
select t1.*,
(
sum(t1.dx * t1.db2_factor)
over (partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
) / t1.dx as db2,
(
if(t1.age >= t1.Nursing_Age, 1, 0)
) * t2.Nursing_Ratio as db3,
least(t1.ppp, t1.policy_year) as db4,
(
ifnull(sum(t1.dx * t1.ppp_)
over (partition by ppp,sex,age_buy order by policy_year rows between 1 following and unbounded following),
0)
/ t1.dx
) * pow((1 + t1.interest_rate), 0.5) as db5
from insurance_dw.prem_src9 t1
join insurance_dw.input t2 on 1 = 1;
--校验
select *
from insurance_dw.prem_src10
where age_buy = 25
and sex = 'M'
and ppp = 20;
--将保费参数因于的数灌入到目标表
insert overwrite table insurance_dw.prem_src
select age_buy,
nursing_age,
sex,
t_age,
ppp,
bpp,
interest_rate,
sa,
policy_year,
age,
qx,
kx,
qx_d,
qx_ci,
dx_d,
dx_ci,
lx,
lx_d,
cx,
cx_,
ci_cx,
ci_cx_,
dx,
dx_d_,
ppp_,
bpp_,
expense,
db1,
db2_factor,
db2,
db3,
db4,
db5
from insurance_dw.prem_src10;
select count(1)
from insurance_dw.prem_src;
-- 步骤 11: 计算保费前的中间结果值: 先分组, 然后进行统计
create or replace view insurance_dw.prem_std_src11 as
select
ppp, sex, age_buy,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * db1 * pow((1 + interest_rate), -0.25),
ci_cx_ * db1)
) as t11,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * db2 * pow((1 + interest_rate), -0.25),
ci_cx_ * db2)
) as v11,
sum(dx * db3) as w11,
sum(dx * ppp_) as q11,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * pow((1 + interest_rate), 0.25), 0)
) as t9,
sum(
if(policy_year = 1, 0.5 * ci_cx_ * pow((1 + interest_rate), 0.25), 0)
) as v9,
sum(dx * expense) as s11,
sum(cx_ * db4) as x11,
sum(ci_cx_ * db5) as y11
from insurance_dw.prem_src
group by ppp, sex, age_buy;
select *
from insurance_dw.prem_std_src11
where age_buy = 25
and sex = 'M'
and ppp = 20;
-- 步骤十二: 核算保费:
create or replace view insurance_dw.prem_std_src12 as
select
t1.age_buy,
t1.sex,
t1.ppp,
106-t1.age_buy as bpp,
input.sa * (t1.t11 + t1.v11 + t1.w11) / (t1.q11 -t1.t9 - t1.v9 -t1.s11 - t1.x11 - t1.y11) as prem
from insurance_dw.prem_std_src11 t1 join insurance_dw.input on 1=1 ;
-- 校验:
select * from insurance_dw.prem_std_src12 where age_buy = 50 and sex = 'M' and ppp = 20;
-- 保存到目标表
insert overwrite table insurance_dw.prem_std
select
age_buy,
sex,
ppp,
bpp,
prem
from insurance_dw.prem_std_src12;
-- 校验数据
select count(1) from insurance_dw.prem_std;
select * from insurance_dw.prem_std where age_buy = 50 and sex = 'M' and ppp = 20;
drop table if exists insurance_dw.prem_cv;
create table if not exists insurance_dw.prem_cv
(
age_buy smallint comment '年投保龄',
sex string comment '性别',
ppp smallint comment '缴费期间',
prem_cv decimal(15, 7) comment '保单价值准备金毛保险费(Preuim)'
)comment '保单价值准备金毛保险费表' row format delimited
fields terminated by '\t'
location 'hdfs://node1:8020/user/hive/warehouse/insurance_dw.db/prem_cv';
2.1 步骤13~16: 价值准备金基础指标
步骤13:
sql复制代码
-- 计算现金价值准备金表:
-- 1- 读取保费参数因子表, 从当中把所有维度以及不需要计算的指标, 全部提取出来
create or replace view insurance_dw.cv_src13 as
select
t1.age_buy,
t1.Nursing_Age,
t1.sex,
t1.t_age,
t1.ppp,
t1.bpp,
t2.interest_rate_cv ,
t1.sa,
t1.policy_year,
t1.age,
t1.qx,
t1.kx,
t1.qx_d,
t1.qx_ci,
t1.dx_d,
t1.dx_ci,
t1.lx,
t1.lx_d,
t1.dx_d / pow((1+ t2.interest_rate_cv),(age+1)) as cx,
t1.ppp_,
t1.bpp_,
t1.expense,
t1.db1,
t1.db2_factor,
t1.db3,
t1.db4
from insurance_dw.prem_src10 t1
join insurance_dw.input as t2 on 1 = 1
union all
select distinct
t1.age_buy,
t1.Nursing_Age,
t1.sex,
t1.t_age,
t1.ppp,
t1.bpp,
t2.interest_rate_cv,
t1.sa,
0 as policy_year,
NULL AS age,
NULL AS qx,
NULL AS kx,
NULL AS qx_d,
NULL AS qx_ci,
NULL AS dx_d,
NULL AS dx_ci,
NULL AS lx,
NULL AS lx_d,
NULL AS cx,
NULL AS ppp_,
NULL AS bpp_,
NULL AS expense,
NULL AS db1,
NULL AS db2_factor,
NULL AS db3,
NULL AS db4
from insurance_dw.prem_src10 t1 join insurance_dw.input as t2;
-- 校验:
select count(1) from insurance_dw.cv_src13;
select * from insurance_dw.cv_src13 where age_buy = 50 and sex = 'M' and ppp = 20 order by policy_year ;
完整代码: 13~16步骤:
sql复制代码
-- 计算现金价值准备金表:
-- 1- 读取保费参数因子表, 从当中把所有维度以及不需要计算的指标, 全部提取出来
create or replace view insurance_dw.cv_src16 as
with cv_src13 as (
select
t1.age_buy,
t1.Nursing_Age,
t1.sex,
t1.t_age,
t1.ppp,
t1.bpp,
t2.interest_rate_cv ,
t1.sa,
t1.policy_year,
t1.age,
t1.qx,
t1.kx,
t1.qx_d,
t1.qx_ci,
t1.dx_d,
t1.dx_ci,
t1.lx,
t1.lx_d,
t1.dx_d / pow((1+ t2.interest_rate_cv),(age+1)) as cx,
t1.ppp_,
t1.bpp_,
t1.expense,
t1.db1,
t1.db2_factor,
t1.db3,
t1.db4
from insurance_dw.prem_src10 t1
join insurance_dw.input as t2 on 1 = 1
union all
select distinct
t1.age_buy,
t1.Nursing_Age,
t1.sex,
t1.t_age,
t1.ppp,
t1.bpp,
t2.interest_rate_cv,
t1.sa,
0 as policy_year,
NULL AS age,
NULL AS qx,
NULL AS kx,
NULL AS qx_d,
NULL AS qx_ci,
NULL AS dx_d,
NULL AS dx_ci,
NULL AS lx,
NULL AS lx_d,
NULL AS cx,
NULL AS ppp_,
NULL AS bpp_,
NULL AS expense,
NULL AS db1,
NULL AS db2_factor,
NULL AS db3,
NULL AS db4
from insurance_dw.prem_src10 t1 join insurance_dw.input as t2
),
cv_src14 as (
select
*,
cx * pow((1+interest_rate_cv),0.5) as cx_,
dx_ci / pow((1+interest_rate_cv),(age+1)) as ci_cx
from cv_src13
),
cv_src15 as (
select
*,
ci_cx * pow((1+interest_rate_cv),0.5) as ci_cx_,
lx / pow((1+interest_rate_cv),age) as dx,
lx_d / pow((1+interest_rate_cv),age) as dx_d_
from cv_src14
)
select
*,
sum(dx * db2_factor) over (partition by ppp,sex,age_buy order by policy_year rows between current row and unbounded following)
/ dx as db2 ,
(
ifnull(sum(dx * ppp_) over (partition by ppp,sex,age_buy order by policy_year rows between 1 following and unbounded following) ,0)
/ dx
) * pow((1+interest_rate_cv),0.5) as db5
from cv_src15;
2.2 步骤 17~18: 计算现金价值准备金毛保费
sql复制代码
with cv_src17 as (
select
ppp,sex,age_buy,
sum(
if(
policy_year = 1,
0.5 *ci_cx_ * db1 * pow((1+interest_rate_cv),-0.25),
ci_cx_ * db1
)
) as t11,
sum(
if(
policy_year = 1,
0.5 * ci_cx_ * db2 * pow((1+interest_rate_cv),-0.25),
ci_cx_ * db2
)
) as v11,
sum(dx * db3) as w11,
sum(dx * ppp_) as q11,
sum(
if(
policy_year = 1,
0.5 * ci_cx_ * pow((1+interest_rate_cv),0.25),
0
)
) as t9,
sum(
if(
policy_year = 1,
0.5 * ci_cx_ * pow((1+interest_rate_cv),0.25),
0
)
) as v9,
sum(dx * expense) as s11,
sum(cx_ * db4) as x11,
sum(ci_cx_ * db5) as y11
from insurance_dw.cv_src16
group by ppp,sex,age_buy
),
cv_src18 as (
select
t1.ppp,t1.sex,t1.age_buy,
(t2.sa *(t1.t11 + t1.v11 + t1.w11) + t3.prem * (t1.t9 + t1.v9 +t1.x11 +t1.y11)) /(t1.q11 - t1.s11) as prem_cv
from cv_src17 t1 join insurance_dw.input t2 on 1 = 1
join insurance_ods.prem_std_real t3 on t1.ppp = t3.ppp and t1.sex = t3.sex and t1.age_buy = t3.age_buy
)
-- 将毛保费结果保存到结果表中: prem_cv
insert overwrite table insurance_dw.prem_cv
select
age_buy,sex,ppp,prem_cv
from cv_src18;
2.3 步骤19~23: 计算各保单年度现金价值
sql复制代码
-- 步骤19
create or replace view insurance_dw.cv_src23 as
with cv_src19 as (
select
t1.*,
(t1.ppp_ - t1.expense) * t2.prem_cv as np_,
t2.prem_cv * sum(t1.dx *(t1.ppp_ -t1.expense)) over(partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
/ t1.dx as pvnp,
if(
t1.policy_year = 1,
(
t1.sa
*
ifnull(sum(t1.ci_cx_ * t1.db1) over (partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between 1 following and unbounded following) ,0)
+
0.5 *(
(t3.prem * t1.ci_cx_ *pow((1+t1.interest_rate_cv),0.25))
+
(t1.sa * t1.db1 * t1.ci_cx_ * pow((1+t1.interest_rate_cv),-0.25))
)) / t1.dx,
t1.sa
*
sum(t1.ci_cx_ * t1.db1) over(partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
/
t1.dx
) as pvdb1,
if(
t1.policy_year = 1,
(
t1.sa
*
ifnull(sum(t1.ci_cx_ * t1.db2) over (partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between 1 following and unbounded following) ,0)
+
0.5 *(
(t3.prem * t1.ci_cx_ *pow((1+t1.interest_rate_cv),0.25))
+
(t1.sa * t1.db2 * t1.ci_cx_ * pow((1+t1.interest_rate_cv),-0.25))
)) / t1.dx,
t1.sa
*
sum(t1.ci_cx_ * t1.db2) over(partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
/
t1.dx
) as pvdb2,
t1.sa
*
sum(t1.dx * t1.db3) over(partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
/
dx as pvdb3,
t3.prem
*
sum(t1.cx_ * t1.db4) over(partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
/
dx as pvdb4,
t3.prem
*
sum(t1.ci_cx_ * t1.db5) over(partition by t1.ppp,t1.sex,t1.age_buy order by t1.policy_year rows between current row and unbounded following)
/
dx as pvdb5
from insurance_dw.cv_src16 t1 join insurance_dw.prem_cv t2 on t1.ppp = t2.ppp and t1.sex = t2.sex and t1.age_buy = t2.age_buy
join insurance_ods.prem_std_real t3 on t1.ppp = t3.ppp and t1.sex = t3.sex and t1.age_buy = t3.age_buy
),
-- 步骤20:
cv_src20 as (
select
*,
if(
policy_year = 0 ,
null,
lead( (pvdb1 + pvdb2 +pvdb3 +pvdb4 +pvdb5 -pvnp),1,0) over(partition by ppp,sex,age_buy order by policy_year)
) as pvr,
if(
ppp = 1 ,
1,
if(
policy_year >= least(20,ppp) ,
1,
0.8 + policy_year * 0.8 / least(20,ppp)
)
) as rt
from cv_src19
),
-- 步骤21:
cv_src21 as (
select
*,
np_ * lag(rt,1,0) over (partition by ppp,sex,age_buy order by policy_year) as np,
db3 * sa as sur_ben,
rt * greatest( (pvr - lead((db3 * sa)) over (partition by ppp,sex,age_buy order by policy_year)) , 0) as cv_1b
from cv_src20
),
-- 步骤22:
cv_src22 as (
select
*,
cv_1b + lead(sur_ben,1,0) over (partition by ppp,sex,age_buy order by policy_year) as cv_1a
from cv_src21
)
-- 步骤23:
select
*,
(np + lag(cv_1b) over (partition by ppp,sex,age_buy order by policy_year) + cv_1a) /2 as cv_2
from cv_src22;
-- 校验:
select * from insurance_dw.cv_src23 where age_buy = 45 and sex = 'F' and ppp = 10;
drop table if exists insurance_app.app_agg_month_incre_rate;
CREATE TABLE if not exists insurance_app.app_agg_month_incre_rate
(
prem DECIMAL(24, 6) comment '本月保费收入',
last_prem DECIMAL(24, 6) comment '上月保费收入',
prem_incre_rate DECIMAL(6, 4)comment '保费收入增长率'
) partitioned by (month string comment '月份')
comment '保费收入增长率表' row format delimited fields terminated by '\t'
location 'hdfs://node1:8020/user/hive/warehouse/insurance_app.db/app_agg_month_incre_rate';
2- 编写SQL:
sql复制代码
-- 计算保费收入增长率
-- 计算当月的保费收入:
with this_month as(
select
sum(prem_thismonth) as prem
from insurance_app.policy_result where month = '2021-04'
),
-- 计算上个月的保费收入:
last_month as(
select
sum(prem_thismonth) as last_prem
from insurance_app.policy_result where month = '2021-03'
)
insert overwrite table insurance_app.app_agg_month_incre_rate partition (month)
select
prem,
last_prem,
cast((prem - last_prem) / last_prem as decimal(12,4)) as prem_incre_rate,
'2021-04' as month
from this_month join last_month on 1 =1 ;
3.3 计算首年保费与保费收入比
1、每月计算一次。下月初计算上月的数据。
2、first_of_total_prem= 首年保费收入/保费收入
sql复制代码
drop TABLE if exists insurance_app.app_agg_month_first_of_total_prem;
CREATE TABLE if not exists insurance_app.app_agg_month_first_of_total_prem
(
first_prem DECIMAL(24, 6),
total_prem DECIMAL(24, 6),
first_of_total_prem DECIMAL(8, 6)
) partitioned by (month string comment '月份')
comment '首年保费与保费收入比表' row format delimited fields terminated by '\t'
location 'hdfs://node1:8020/user/hive/warehouse/insurance_app.db/app_agg_month_first_of_total_prem';
sql复制代码
-- 计算首年保费与保费收入比:
-- 思考 首年保费如何计算: 有多少张保单, 那么将每一张保单的保费累加在一起就可以得到首年总保费
-- 思考: 总保费如何计算: 保费 * 缴费年限
-- 如何确定缴费年限: 这三种情况 谁小 选择 谁
-- 1- 保单年度 > 缴费期 , 缴费年限 = 缴费期
-- 2- 保单年度 <= 缴费期 , 缴费年限 = 保单年度
-- 3- 退保日期 < 缴费期, 缴费年限 = 截止到退保日期的保单年度
with t1 as (
select
t1.prem_std,
least(
t1.policy_year,
cast(t1.ppp as int),
floor(months_between(t2.elapse_date,t1.buy_datetime) / 12) + 1
) as ppp_year
from insurance_app.policy_result t1
left join insurance_ods.policy_surrender t2 on t1.pol_no = t2.pol_no and t1.month = '2021-04'
)
insert overwrite table insurance_app.app_agg_month_first_of_total_prem partition (month='2021-04')
select
sum(prem_std) as first_prem ,
sum(prem_std * ppp_year) as total_prem,
sum(prem_std) / sum(prem_std * ppp_year) as first_of_total_prem
from t1;
3.4 个人营销渠道的件均保费
1、每月计算一次。下月初计算上月的数据。
2、个人营销渠道的件均保费 premium per policy of individual marketing channel
个人营销渠道的件均保费=(本月的)个人营销渠道的首年原保费收入÷(本月的)个人营销渠道的新单件数
解释:个人营销渠道的件均保费是指个人营销渠道的首年原保费收入与新单件数的比值。
复制代码
大白话:
计算: 当月产生新单的总保费 和 当月产生新单数量 计算 平均每笔保单的保费
sql复制代码
drop TABLE if exists insurance_app.app_agg_month_premperpol;CREATE TABLE if not exists insurance_app.app_agg_month_premperpol( insur_code string comment '保险代码', insur_name string comment '保险名称', prem_per_pol DECIMAL(38, 2) comment '个人营销渠道的件均保费') partitioned by (month string comment '月份') comment '个人营销渠道的件均保费' row format delimited fields terminated by '\t'location 'hdfs://node1:8020/user/hive/warehouse/insurance_app.db/app_agg_month_premperpol';
sql复制代码
insert overwrite table insurance_app.app_agg_month_premperpol partition (month)
select
insur_code,insur_name,
cast(sum(prem_thismonth) / count( if(prem_thismonth >0, pol_no,NULL)) as decimal(38,2) ) as prem_per_pol,
'2021-04' as month
from insurance_app.policy_result where month = '2021-04'
group by insur_code,insur_name;
3.5 死亡发生率和残疾发生率
sql复制代码
DROP TABLE if exists insurance_app.app_agg_month_mort_dis_rate;CREATE TABLE if not exists insurance_app.app_agg_month_mort_dis_rate( insur_code string comment '保险代码', insur_name string comment '保险名称', age int, sg_rate decimal(8,6), sc_rate decimal(8,6) ) partitioned by (month string comment '月份') comment '死亡发生率和残疾发生率表' row format delimited fields terminated by '\t'location 'hdfs://node1:8020/user/hive/warehouse/insurance_app.db/app_agg_month_mort_dis_rate';
sql复制代码
-- 计算 死亡发生率 =在月末时点,统计每个年龄的人群,按一岁一组,计算其中历史所有发生过死亡的保单数/所有的有效保单
-- 残疾发生率 =在月末时点,统计每个年龄的人群,按一岁一组,计算其中历史所有发生过残疾的保单数/所有的有效保单
-- 分组中年龄: 以实际发生赔付年龄为分组条件, 而不是投保年龄
with t1 as(
select
t1.insur_code,
t1.insur_name,
floor(months_between(t3.claim_date,t1.buy_datetime)/12) + t1.age_buy as age,
count( if(t3.claim_item like 'sg%',t3.pol_no,NULL) ) as sg_cnt,
count( if(t3.claim_item like 'sc%',t3.pol_no,NULL) ) as sc_cnt,
count( t1.pol_no) as total_cnt
from insurance_app.policy_result t1
left join insurance_ods.claim_info t3 on t1.pol_no = t3.pol_no
group by t1.insur_name,t1.insur_code, floor(months_between(t3.claim_date,t1.buy_datetime)/12) + t1.age_buy
),
t2 as(
select
insur_code,
insur_name,
age,
sg_cnt,
sc_cnt,
sum(total_cnt) over(partition by insur_code,insur_name)as total_cnt
from t1
)
insert overwrite table insurance_app.app_agg_month_mort_dis_rate partition (month)
select
insur_code,
insur_name,
age,
sg_cnt / total_cnt as sg_rate,
sc_cnt / total_cnt as sc_rate,
'2021-04' as month
from t2;
3.6 新业务价值率
1、每月计算一次。下月初计算上月的数据。
2、新业务价值率(NBEV,New Business Embed Value)= PV(预期各年利润) / 首年保费收入
3、对一个产品的一个保单的业务价值率而言,它存在prem_std_real表中。
4、对一个产品的多张保单而言,
第1张单,期交保费100元,新业务价值率是10%
第2张单,期交保费是200元,新业务价值率是20%
则新业务价值率 = (100*10% + 200* 20%) / 300 = 16.67%
sql复制代码
--新业务价值率
drop table if exists insurance_app.app_agg_month_nbev;
create table if not exists insurance_app.app_agg_month_nbev
(
insur_code string comment '保险代码',
insur_name string comment '保险名称',
nbev decimal(38,11) comment '新业务价值率'
) partitioned by (month string comment '月份')
comment '新业务价值率表' row format delimited fields terminated by '\t'
location 'hdfs://node1:8020/user/hive/warehouse/insurance_app.db/app_agg_month_nbev';
sql复制代码
-- 计算新业务价值率:
insert overwrite table insurance_app.app_agg_month_nbev partition(month)
select
t1.insur_code,t1.insur_name,
sum(t1.prem_std * t2.nbev) / sum(t1.prem_std) as nbev,
'2021-04' as month
from insurance_app.policy_result t1 join insurance_ods.prem_std_real t2
on t1.ppp = t2.ppp and t1.sex = t2.sex and t1.age_buy = t2.age_buy
where t1.month = '2021-04'
group by t1.insur_code,t1.insur_name;
drop table if exists insurance_app.app_agg_month_high_net_rate;
create table if not exists insurance_app.app_agg_month_high_net_rate
(
high_net_rate decimal(8, 6) comment '高净值客户比例'
) partitioned by (month string comment '月份')
comment '高净值客户比例表' row format delimited fields terminated by '\t'
location 'hdfs://node1:8020/user/hive/warehouse/insurance_app.db/app_agg_month_high_net_rate';
sql复制代码
-- 高净值客户群体比例:
insert overwrite table insurance_app.app_agg_month_high_net_rate partition(month)
select
count( DISTINCT if(t1.income >= 10000000,t1.user_id,NULL) ) / count(distinct t1.user_id) as high_net_rate,
'2021-04' as month
from insurance_ods.policy_client t1 join insurance_app.policy_result t2
on t1.user_id = t2.user_id and t2.month = '2021-04';
drop table if exists insurance_app.app_agg_month_dir;
create table if not exists insurance_app.app_agg_month_dir
(
direction string comment '所在区域',
sum_users bigint comment '总投保人数',
sum_prem decimal(24) comment '当月保费汇总',
sum_cv_1b decimal(27,2) comment '总现金价值',
sum_sur_ben decimal(27) comment '总生存金',
sum_rsv2_re decimal(27,2) comment '总准备金'
) partitioned by (month string comment '月份')
comment '各地区的汇总保费表' row format delimited fields terminated by '\t'
location 'hdfs://node1:8020/user/hive/warehouse/insurance_app.db/app_agg_month_dir';
sql复制代码
insert overwrite table insurance_app.app_agg_month_dir partition (month)
select
direction,
count(distinct user_id) as sum_users,
sum(prem_thismonth) as sum_prem,
sum(cv_1b) as sum_cv_1b,
sum(sur_ben) as sum_sur_ben,
sum(rsv2_re) as sum_rsv2_re,
'2021-04' as month
from insurance_app.policy_result where month = '2021-04'
group by direction;