ods层
主要在ods干的事,就是根据需求将所需要的表的数据从mysql中导入到ods层中,所以需要在hive中建立ods层的表,并且在导入的时候需要根据需求过滤一下空数据,并在建表时加入一个dt字段记录导入的时间
ods层建表语句
sql
1,edu_ods层建立ods_customer_relationship
drop table edu_ods.ods_customer_relationship;
CREATE TABLE edu_ods.ods_customer_relationship (
`id` bigint ,
`create_date_time` string ,
`update_date_time` string COMMENT '最后更新时间',
`deleted` bigint COMMENT '是否被删除(禁用)',
`customer_id` bigint COMMENT '所属客户id',
`first_id` bigint COMMENT '第一条客户关系id',
`belonger` bigint COMMENT '归属人',
`belonger_name` string COMMENT '归属人姓名',
`initial_belonger` bigint COMMENT '初始归属人',
`distribution_handler` bigint COMMENT '分配处理人',
`business_scrm_department_id` bigint COMMENT '归属部门',
`last_visit_time` string COMMENT '最后回访时间',
`next_visit_time` string COMMENT '下次回访时间',
`origin_type` string COMMENT '数据来源',
`itcast_school_id` bigint COMMENT '校区Id',
`itcast_subject_id` bigint COMMENT '学科Id',
`intention_study_type` string COMMENT '意向学习方式',
`anticipat_signup_date` string COMMENT '预计报名时间',
`level` string COMMENT '客户级别',
`creator` bigint COMMENT '创建人',
`current_creator` bigint COMMENT '当前创建人:初始==创建人,当在公海拉回时为 拉回人',
`creator_name` string COMMENT '创建者姓名',
`origin_channel` string COMMENT '来源渠道',
`comment` string COMMENT '备注',
`first_customer_clue_id` bigint COMMENT '第一条线索id',
`last_customer_clue_id` bigint COMMENT '最后一条线索id',
`process_state` string COMMENT '处理状态',
`process_time` string COMMENT '处理状态变动时间',
`payment_state` string COMMENT '支付状态',
`payment_time` string COMMENT '支付状态变动时间',
`signup_state` string COMMENT '报名状态',
`signup_time` string COMMENT '报名时间',
`notice_state` string COMMENT '通知状态',
`notice_time` string COMMENT '通知状态变动时间',
`lock_state` bigint COMMENT '锁定状态',
`lock_time` string COMMENT '锁定状态修改时间',
`itcast_clazz_id` bigint COMMENT '所属ems班级id',
`itcast_clazz_time` string COMMENT '报班时间',
`payment_url` string COMMENT '付款链接',
`payment_url_time` string COMMENT '支付链接生成时间',
`ems_student_id` bigint COMMENT 'ems的学生id',
`delete_reason` string COMMENT '删除原因',
`deleter` bigint COMMENT '删除人',
`deleter_name` string COMMENT '删除人姓名',
`delete_time` string COMMENT '删除时间',
`course_id` bigint COMMENT '课程ID',
`course_name` string COMMENT '课程名称',
`delete_comment` string COMMENT '删除原因说明',
`close_state` string COMMENT '关闭装填',
`close_time` string COMMENT '关闭状态变动时间',
`appeal_id` bigint COMMENT '申诉id',
`tenant` bigint COMMENT '租户',
`total_fee` decimal(19,0) COMMENT '报名费总金额',
`belonged` bigint COMMENT '小周期归属人',
`belonged_time` string COMMENT '归属时间',
`belonger_time` string COMMENT '归属时间',
`transfer` bigint COMMENT '转移人',
`transfer_time` string COMMENT '转移时间',
`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
`transfer_bxg_oa_account` string COMMENT '转移到博学谷归属人OA账号',
`transfer_bxg_belonger_name` string COMMENT '转移到博学谷归属人OA姓名'
) COMMENT '客户意向表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');
2,edu_ods层建立ods_customer_clue
drop table edu_ods.`ods_customer_clue`;
CREATE TABLE edu_ods.`ods_customer_clue` (
`id` bigint ,
`create_date_time` string COMMENT '创建时间',
`update_date_time` string COMMENT '最后更新时间',
`deleted` bigint COMMENT '是否被删除(禁用)',
`customer_id` bigint COMMENT '客户id',
`customer_relationship_id` bigint COMMENT '客户关系id',
`session_id` string COMMENT '七陌会话id',
`sid` string COMMENT '访客id',
`status` string COMMENT '状态(undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转)',
`user` string COMMENT '所属坐席',
`create_time` string COMMENT '七陌创建时间',
`platform` string COMMENT '平台来源 (pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询)',
`s_name` string COMMENT '用户名称',
`seo_source` string COMMENT '搜索来源',
`seo_keywords` string COMMENT '关键字',
`ip` string COMMENT 'IP地址',
`referrer` string COMMENT '上级来源页面',
`from_url` string COMMENT '会话来源页面',
`landing_page_url` string COMMENT '访客着陆页面',
`url_title` string COMMENT '咨询页面title',
`to_peer` string COMMENT '所属技能组',
`manual_time` string COMMENT '人工开始时间',
`begin_time` string COMMENT '坐席领取时间 ',
`reply_msg_count` bigint COMMENT '客服回复消息数',
`total_msg_count` bigint COMMENT '消息总数',
`msg_count` bigint COMMENT '客户发送消息数',
`comment` string COMMENT '备注',
`finish_reason` string COMMENT '结束类型',
`finish_user` string COMMENT '结束坐席',
`end_time` string COMMENT '会话结束时间',
`platform_description` string COMMENT '客户平台信息',
`browser_name` string COMMENT '浏览器名称',
`os_info` string COMMENT '系统名称',
`area` string COMMENT '区域',
`country` string COMMENT '所在国家',
`province` string COMMENT '省',
`city` string COMMENT '城市',
`creator` bigint COMMENT '创建人',
`name` string COMMENT '客户姓名',
`idcard` string COMMENT '身份证号',
`phone` string COMMENT '手机号',
`itcast_school_id` bigint COMMENT '校区Id',
`itcast_school` string COMMENT '校区',
`itcast_subject_id` bigint COMMENT '学科Id',
`itcast_subject` string COMMENT '学科',
`wechat` string COMMENT '微信',
`qq` string COMMENT 'qq号',
`email` string COMMENT '邮箱',
`gender` string COMMENT '性别',
`level` string COMMENT '客户级别',
`origin_type` string COMMENT '数据来源渠道',
`information_way` string COMMENT '资讯方式',
`working_years` string COMMENT '开始工作时间',
`technical_directions` string COMMENT '技术方向',
`customer_state` string COMMENT '当前客户状态',
`valid` bigint COMMENT '该线索是否是网资有效线索',
`anticipat_signup_date` string COMMENT '预计报名时间',
`clue_state` string COMMENT '线索状态',
`scrm_department_id` bigint COMMENT 'SCRM内部部门id',
`superior_url` string COMMENT '诸葛获取上级页面URL',
`superior_source` string COMMENT '诸葛获取上级页面URL标题',
`landing_url` string COMMENT '诸葛获取着陆页面URL',
`landing_source` string COMMENT '诸葛获取着陆页面URL来源',
`info_url` string COMMENT '诸葛获取留咨页URL',
`info_source` string COMMENT '诸葛获取留咨页URL标题',
`origin_channel` string COMMENT '投放渠道',
`course_id` bigint,
`course_name` string ,
`zhuge_session_id` string,
`is_repeat` bigint COMMENT '是否重复线索(手机号维度) 0:正常 1:重复',
`tenant` bigint COMMENT '租户id',
`activity_id` string COMMENT '活动id',
`activity_name` string COMMENT '活动名称',
`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
`shunt_mode_id` bigint COMMENT '匹配到的技能组id',
`shunt_employee_group_id` bigint COMMENT '所属分流员工组'
) COMMENT '客户线索表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');
3,,edu_ods层建立ods_employee
drop table edu_ods.ods_employee;
create table ods_employee
(
id bigint ,
email string comment '公司邮箱,OA登录账号',
real_name string comment '员工的真实姓名',
phone string comment '手机号,目前还没有使用;隐私问题OA接口没有提供这个属性,',
department_id string comment 'OA中的部门编号,有负值',
department_name string comment 'OA中的部门名',
remote_login bigint comment '员工是否可以远程登录',
job_number string comment '员工工号',
cross_school bigint comment '是否有跨校区权限',
last_login_date string comment '最后登录日期',
creator bigint comment '创建人',
create_date_time string comment '创建时间',
update_date_time string comment '最后更新时间',
deleted bigint comment '是否被删除(禁用)',
scrm_department_id bigint comment 'SCRM内部部门id',
leave_office bigint comment '离职状态',
leave_office_time string comment '离职时间',
reinstated_time string comment '复职时间',
superior_leaders_id bigint comment '上级领导ID',
tdepart_id bigint comment '直属部门',
tenant bigint,
ems_user_name string
)comment '员工信息表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');
4,,edu_ods层建立ods_scrm_department
drop table edu_ods.`ods_scrm_department`;
CREATE TABLE edu_ods.`ods_scrm_department` (
`id` bigint COMMENT '部门id',
`name` string COMMENT '部门名称',
`parent_id` bigint COMMENT '父部门id',
`create_date_time` string COMMENT '创建时间',
`update_date_time` string COMMENT '更新时间',
`deleted` bigint COMMENT '删除标志',
`id_path` string COMMENT '编码全路径',
`tdepart_code` bigint COMMENT '直属部门',
`creator` string COMMENT '创建者',
`depart_level` bigint COMMENT '部门层级',
`depart_sign` bigint COMMENT '部门标志,暂时默认1',
`depart_line` bigint COMMENT '业务线,存储业务线编码',
`depart_sort` bigint COMMENT '排序字段',
`disable_flag` bigint COMMENT '禁用标志',
`tenant` bigint
) comment '部门表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');
5,edu_ods层建立ods_customer_appeal
drop table edu_ods.`ods_customer_appeal`;
CREATE TABLE edu_ods.`ods_customer_appeal` (
`id` bigint COMMENT '主键',
`customer_relationship_first_id` bigint COMMENT '第一条客户关系id',
`employee_id` bigint COMMENT '申诉人',
`employee_name` string COMMENT '申诉人姓名',
`employee_department_id` bigint COMMENT '申诉人部门',
`employee_tdepart_id` bigint COMMENT '申诉人所属部门',
`appeal_status` bigint COMMENT '申诉状态,0:待稽核 1:无效 2:有效',
`audit_id` bigint COMMENT '稽核人id',
`audit_name` string COMMENT '稽核人姓名',
`audit_department_id` bigint COMMENT '稽核人所在部门',
`audit_department_name` string COMMENT '稽核人部门名称',
`audit_date_time` string COMMENT '稽核时间',
`create_date_time` string COMMENT '创建时间(申诉时间)',
`update_date_time` string COMMENT '更新时间',
`deleted` bigint COMMENT '删除标志位',
`tenant` bigint
) comment '线索申诉表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');
6,edu_ods层建立ods_itcast_clazz
CREATE TABLE edu_ods.`ods_itcast_clazz` (
`id` string COMMENT 'ems课程id(非自增)',
`create_date_time` string COMMENT '创建时间',
`update_date_time` string COMMENT '最后更新时间',
`deleted` string COMMENT '是否被删除(禁用)',
`itcast_school_id` string COMMENT 'ems校区ID',
`itcast_school_name` string COMMENT 'ems校区名称',
`itcast_subject_id` string COMMENT 'ems学科ID',
`itcast_subject_name` string COMMENT 'ems学科名称',
`itcast_brand` string COMMENT 'ems品牌',
`clazz_type_state` string COMMENT '班级类型状态',
`clazz_type_name` string COMMENT '班级类型名称',
`teaching_mode` string COMMENT '授课模式',
`start_time` string COMMENT '开班时间',
`end_time` string COMMENT '毕业时间',
`comment` string COMMENT '备注',
`detail` string COMMENT '详情(比如:27期)',
`uncertain` string COMMENT '待定班(0:否,1:是)',
`tenant` string
)comment '报名课程表'
partitioned by (dt string)
row format delimited fields terminated by '\t'
stored as orc tblproperties ('orc.compress'='ZLIB');
数据从mysql导入ods层语句
sql
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query "SELECT * ,${dateStr} as dt FROM edu_scrm.customer_${tableMonth}
WHERE wce.create_time BETWEEN '${dateStr} 00:00:00' AND '${dateStr} 23:59:59' and \$CONDITIONS" \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer \
-m 1
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_appeal
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_appeal where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_appeal \
--m 1;
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_clue
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_clue where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_clue \
--m 1;
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_relationship
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_relationship where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_relationship \
--m 1;
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的employee
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from employee where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_employee \
--m 1;
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_clazz
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_clazz where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_clazz \
--m 1;
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_school
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_school where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_school \
--m 1;
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_subject
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_subject where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_subject\
--m 1;
将数据从mysql中导入到hive中的edu_ods 数据库下的scrm_department
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from scrm_department where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_scrm_department \
--m 1;
全量脚本将数据从mysql导入hive的ods层的全量脚本
sql
export SQOOP_HOME=/usr/bin/sqoop
dateStr=`date -d '-1 day' +'%Y-%m-%d'` ###获取昨天的日期,用于增量导入
tableMonth=`date -d '-1 day' +'%Y_%m'` ###获取当年的年月,用于导入数据的业务数据库的表名称
jdbcUrl='jdbc:mysql://106.75.33.59:3306/edu_scrm?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true'
username='itcast_edu_stu'
password='itcast_edu_stu'
### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_appeal
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_appeal where 1=1 and $CONDITIONS' and dt=dateStr \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_appeal \
--m 1;
### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_clue
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_clue where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_clue \
--m 1;
### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_relationship
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_relationship where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer_relationship \
--m 1;
### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的employee
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from employee where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_employee \
--m 1;
### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_clazz
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from itcast_clazz where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_itcast_clazz \
--m 1;
### 将数据从mysql中导入到hive中的edu_ods 数据库下的scrm_department
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from scrm_department where 1=1 and $CONDITIONS' \
--hcatalog-database edu_ods \
--hcatalog-table ods_scrm_department \
--m 1;
dwd层维度表事实表拉链表建表语句
sql
1,创建DWD层的表dim_customer_appeal
drop table edu_dwd.dim_customer_appeal;
CREATE TABLE edu_dwd.`dim_customer_appeal` (
`id` int,
`customer_relationship_first_id` int COMMENT '第一条客户关系id',
`employee_id` int COMMENT '申诉人',
`employee_name` string COMMENT '申诉人姓名',
`employee_department_id` int COMMENT '申诉人部门',
`employee_tdepart_id` int COMMENT '申诉人所属部门',
`appeal_status` int COMMENT '申诉状态,0:待稽核 1:无效 2:有效',
`audit_id` int COMMENT '稽核人id',
`audit_name` string COMMENT '稽核人姓名',
`audit_department_id` int COMMENT '稽核人所在部门',
`audit_department_name` string COMMENT '稽核人部门名称',
`audit_date_time` string COMMENT '稽核时间',
`create_date_time` string COMMENT '创建时间(申诉时间)',
`update_date_time` string COMMENT '更新时间',
`deleted` int COMMENT '删除标志位',
`tenant` int ,
`end_date` string COMMENT '失效日期',
start_date string COMMENT '开始日期'
) comment '线索申诉维度拉链表'
partitioned by (dt string)
clustered by (customer_relationship_first_id) into 6 buckets
row format delimited fields terminated by '\t'
stored as orc tblproperties ('orc.compress'='SNAPPY')
relationship,clue,appeal,itcast_clazz,employee,department
2,创建DWD层的表dim_itcast_clazz
drop table edu_dwd.dim_itcast_clazz;
CREATE TABLE edu_dwd.`dim_itcast_clazz` (
`id` int COMMENT 'ems课程id(非自增)',
`create_date_time` string COMMENT '创建时间',
`update_date_time` string COMMENT '最后更新时间',
`deleted` int COMMENT '是否被删除(禁用)',
`itcast_school_id` string COMMENT 'ems校区ID',
`itcast_school_name` string COMMENT 'ems校区名称',
`itcast_subject_id` string COMMENT 'ems学科ID',
`itcast_subject_name` string COMMENT 'ems学科名称',
`itcast_brand` string COMMENT 'ems品牌',
`clazz_type_state` string COMMENT '班级类型状态',
`clazz_type_name` string COMMENT '班级类型名称',
`teaching_mode` string COMMENT '授课模式',
`start_time` string COMMENT '开班时间',
`end_time` string COMMENT '毕业时间',
`comment` string COMMENT '备注',
`detail` string COMMENT '详情(比如:27期)',
`uncertain` int COMMENT '待定班(0:否,1:是)',
`tenant` int ,
`end_date` string COMMENT '失效日期',
start_date string COMMENT '开始日期',
dt string
)comment '报名课程表'
partitioned by (year string,month string,day string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t'
stored as orc tblproperties ('orc.compress'='SNAPPY');
3,创建DWD层的表dim_scrm_department
CREATE TABLE edu_dwd.`dim_scrm_department` (
`id` bigint COMMENT '部门id',
`name` string COMMENT '部门名称',
`parent_id` bigint COMMENT '父部门id',
`create_date_time` string COMMENT '创建时间',
`update_date_time` string COMMENT '更新时间',
`deleted` bigint COMMENT '删除标志',
`id_path` string COMMENT '编码全路径',
`tdepart_code` bigint COMMENT '直属部门',
`creator` string COMMENT '创建者',
`depart_level` bigint COMMENT '部门层级',
`depart_sign` bigint COMMENT '部门标志,暂时默认1',
`depart_line` bigint COMMENT '业务线,存储业务线编码',
`depart_sort` bigint COMMENT '排序字段',
`disable_flag` bigint COMMENT '禁用标志',
`tenant` bigint,
dt string
) comment '部门表'
partitioned by (yeat string,month string ,day string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');
4,创建了DWD层的表edu_dwd.dim_employee
create table edu_dwd.dim_employee
(
id bigint ,
email string comment '公司邮箱,OA登录账号',
real_name string comment '员工的真实姓名',
phone string comment '手机号,目前还没有使用;隐私问题OA接口没有提供这个属性,',
department_id string comment 'OA中的部门编号,有负值',
department_name string comment 'OA中的部门名',
remote_login bigint comment '员工是否可以远程登录',
job_number string comment '员工工号',
cross_school bigint comment '是否有跨校区权限',
last_login_date string comment '最后登录日期',
creator bigint comment '创建人',
create_date_time string comment '创建时间',
update_date_time string comment '最后更新时间',
deleted bigint comment '是否被删除(禁用)',
scrm_department_id bigint comment 'SCRM内部部门id',
leave_office bigint comment '离职状态',
leave_office_time string comment '离职时间',
reinstated_time string comment '复职时间',
superior_leaders_id bigint comment '上级领导ID',
tdepart_id bigint comment '直属部门',
tenant bigint,
ems_user_name string
)comment '员工信息表'
partitioned by (dt string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');
5,创建DWD层的表ods_customer_clue
drop table edu_dwd.ods_customer_clue;
CREATE TABLE edu_dwd.ods_customer_clue (
`id` bigint ,
`create_date_time` string COMMENT '创建时间',
`update_date_time` string COMMENT '最后更新时间',
`deleted` bigint COMMENT '是否被删除(禁用)',
`customer_id` bigint COMMENT '客户id',
`customer_relationship_id` bigint COMMENT '客户关系id',
`session_id` string COMMENT '七陌会话id',
`sid` string COMMENT '访客id',
`status` string COMMENT '状态(undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转)',
`user` string COMMENT '所属坐席',
`create_time` string COMMENT '七陌创建时间',
`platform` string COMMENT '平台来源 (pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询)',
`s_name` string COMMENT '用户名称',
`seo_source` string COMMENT '搜索来源',
`seo_keywords` string COMMENT '关键字',
`ip` string COMMENT 'IP地址',
`referrer` string COMMENT '上级来源页面',
`from_url` string COMMENT '会话来源页面',
`landing_page_url` string COMMENT '访客着陆页面',
`url_title` string COMMENT '咨询页面title',
`to_peer` string COMMENT '所属技能组',
`manual_time` string COMMENT '人工开始时间',
`begin_time` string COMMENT '坐席领取时间 ',
`reply_msg_count` bigint COMMENT '客服回复消息数',
`total_msg_count` bigint COMMENT '消息总数',
`msg_count` bigint COMMENT '客户发送消息数',
`comment` string COMMENT '备注',
`finish_reason` string COMMENT '结束类型',
`finish_user` string COMMENT '结束坐席',
`end_time` string COMMENT '会话结束时间',
`platform_description` string COMMENT '客户平台信息',
`browser_name` string COMMENT '浏览器名称',
`os_info` string COMMENT '系统名称',
`area` string COMMENT '区域',
`country` string COMMENT '所在国家',
`province` string COMMENT '省',
`city` string COMMENT '城市',
`creator` bigint COMMENT '创建人',
`name` string COMMENT '客户姓名',
`idcard` string COMMENT '身份证号',
`phone` string COMMENT '手机号',
`itcast_school_id` bigint COMMENT '校区Id',
`itcast_school` string COMMENT '校区',
`itcast_subject_id` bigint COMMENT '学科Id',
`itcast_subject` string COMMENT '学科',
`wechat` string COMMENT '微信',
`qq` string COMMENT 'qq号',
`email` string COMMENT '邮箱',
`gender` string COMMENT '性别',
`level` string COMMENT '客户级别',
`origin_type` string COMMENT '数据来源渠道',
`information_way` string COMMENT '资讯方式',
`working_years` string COMMENT '开始工作时间',
`technical_directions` string COMMENT '技术方向',
`customer_state` string COMMENT '当前客户状态',
`valid` bigint COMMENT '该线索是否是网资有效线索',
`anticipat_signup_date` string COMMENT '预计报名时间',
`clue_state` string COMMENT '线索状态',
`scrm_department_id` bigint COMMENT 'SCRM内部部门id',
`superior_url` string COMMENT '诸葛获取上级页面URL',
`superior_source` string COMMENT '诸葛获取上级页面URL标题',
`landing_url` string COMMENT '诸葛获取着陆页面URL',
`landing_source` string COMMENT '诸葛获取着陆页面URL来源',
`info_url` string COMMENT '诸葛获取留咨页URL',
`info_source` string COMMENT '诸葛获取留咨页URL标题',
`origin_channel` string COMMENT '投放渠道',
`course_id` bigint,
`course_name` string ,
`zhuge_session_id` string,
`is_repeat` bigint COMMENT '是否重复线索(手机号维度) 0:正常 1:重复',
`tenant` bigint COMMENT '租户id',
`activity_id` string COMMENT '活动id',
`activity_name` string COMMENT '活动名称',
`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
`shunt_mode_id` bigint COMMENT '匹配到的技能组id',
`shunt_employee_group_id` bigint COMMENT '所属分流员工组',
`end_date` string COMMENT '失效日期',
start_date string COMMENT '开始日期'
dt string
) COMMENT '客户线索表'
partitioned by (year string,month string ,day string)
clustered by (customer_relationship_id) into 6 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');
6,创建DWD层的表fact_customer_relationship
drop table edu_ods.ods_customer_relationship;
CREATE TABLE edu_dwd.fact_customer_relationship(
`id` bigint ,
`create_date_time` string ,
`update_date_time` string COMMENT '最后更新时间',
`deleted` bigint COMMENT '是否被删除(禁用)',
`customer_id` bigint COMMENT '所属客户id',
`first_id` bigint COMMENT '第一条客户关系id',
`belonger` bigint COMMENT '归属人',
`belonger_name` string COMMENT '归属人姓名',
`initial_belonger` bigint COMMENT '初始归属人',
`distribution_handler` bigint COMMENT '分配处理人',
`business_scrm_department_id` bigint COMMENT '归属部门',
`last_visit_time` string COMMENT '最后回访时间',
`next_visit_time` string COMMENT '下次回访时间',
`origin_type` string COMMENT '数据来源',
`itcast_school_id` bigint COMMENT '校区Id',
`itcast_subject_id` bigint COMMENT '学科Id',
`intention_study_type` string COMMENT '意向学习方式',
`anticipat_signup_date` string COMMENT '预计报名时间',
`level` string COMMENT '客户级别',
`creator` bigint COMMENT '创建人',
`current_creator` bigint COMMENT '当前创建人:初始==创建人,当在公海拉回时为 拉回人',
`creator_name` string COMMENT '创建者姓名',
`origin_channel` string COMMENT '来源渠道',
`comment` string COMMENT '备注',
`first_customer_clue_id` bigint COMMENT '第一条线索id',
`last_customer_clue_id` bigint COMMENT '最后一条线索id',
`process_state` string COMMENT '处理状态',
`process_time` string COMMENT '处理状态变动时间',
`payment_state` string COMMENT '支付状态',
`payment_time` string COMMENT '支付状态变动时间',
`signup_state` string COMMENT '报名状态',
`signup_time` string COMMENT '报名时间',
`notice_state` string COMMENT '通知状态',
`notice_time` string COMMENT '通知状态变动时间',
`lock_state` bigint COMMENT '锁定状态',
`lock_time` string COMMENT '锁定状态修改时间',
`itcast_clazz_id` bigint COMMENT '所属ems班级id',
`itcast_clazz_time` string COMMENT '报班时间',
`payment_url` string COMMENT '付款链接',
`payment_url_time` string COMMENT '支付链接生成时间',
`ems_student_id` bigint COMMENT 'ems的学生id',
`delete_reason` string COMMENT '删除原因',
`deleter` bigint COMMENT '删除人',
`deleter_name` string COMMENT '删除人姓名',
`delete_time` string COMMENT '删除时间',
`course_id` bigint COMMENT '课程ID',
`course_name` string COMMENT '课程名称',
`delete_comment` string COMMENT '删除原因说明',
`close_state` string COMMENT '关闭装填',
`close_time` string COMMENT '关闭状态变动时间',
`appeal_id` bigint COMMENT '申诉id',
`tenant` bigint COMMENT '租户',
`total_fee` decimal(19,0) COMMENT '报名费总金额',
`belonged` bigint COMMENT '小周期归属人',
`belonged_time` string COMMENT '归属时间',
`belonger_time` string COMMENT '归属时间',
`transfer` bigint COMMENT '转移人',
`transfer_time` string COMMENT '转移时间',
`follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
`transfer_bxg_oa_account` string COMMENT '转移到博学谷归属人OA账号',
`transfer_bxg_belonger_name` string COMMENT '转移到博学谷归属人OA姓名'
) COMMENT '客户意向表'
partitioned by (year string,month string ,day string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');
数据从ods层导入dwd层语句
sql
1,将ods层的数据插入dwd层的表dim_customer_appeal(拉链表)
insert overwrite table edu_dwd.dim_customer_appeal PARTITION (dt)
select
`id` ,
`customer_relationship_first_id` ,
`employee_id` ,
`employee_name` ,
`employee_department_id`,
`employee_tdepart_id`,
`appeal_status`,
`audit_id`,
`audit_name`,
`audit_department_id` ,
`audit_department_name` ,
`audit_date_time` ,
`create_date_time`,
`update_date_time`,
`deleted`,
`tenant` ,
'9999-99-99' end_date,
update_date_time as start_date,
dt
from edu_ods.ods_customer_appeal
2,,将ods层的数据插入dwd层的表edu_dwd.`dim_itcast_clazz`(拉链表)
insert overwrite table edu_dwd.`dim_itcast_clazz` partition(year,month ,day)
select
id
,create_date_time
,update_date_time
,deleted
,itcast_school_id
,itcast_school_name
,itcast_subject_id
,itcast_subject_name
,itcast_brand
,clazz_type_state
,clazz_type_name
,teaching_mode
,start_time
,end_time
,comment
,detail
,uncertain
,tenant
,'9999-99-99' end_date
,update_date_time as start_date
,dt string
,year(create_date_time)
,month(create_date_time)
,day(create_date_time)
from
edu_ods.ods_itcast_clazz
3,将ods层的数据插入dwd层的表dim_scrm_department
insert overwrite table edu_dwd.dim_scrm_department partition(year,month,day)
select
id
,name
,parent_id
,create_date_time
,update_date_time
,deleted
,id_path
,tdepart_code
,creator
,depart_level
,depart_sign
,depart_line
,depart_sort
,disable_flag
,tenant
,dt
,year(create_date_time) as year
,month(create_date_time) as month
,day(create_date_time) as day
from edu_ods.ods_scrm_department
4,将ods层的数据插入dwd层的表edu_dwd.dim_employee
insert overwrite table edu_dwd.dim_employee partition(dt)
select
,id
,email
,real_name
,phone
,department_id
,department_name
,remote_login
,job_number
,cross_school
,last_login_date
,creator
,create_date_time
,update_date_time
,deleted
,scrm_department_id
,leave_office
,leave_office_time
,reinstated_time
,superior_leaders_id
,tdepart_id
,tenant
,ems_user_name
,dt
from deu_ods.ods_employee
5,将ods层的数据插入dwd层的表clue(拉链表)
insert overwrite table edu_dwd.fact_customer_clue partition(year,month,day)
select
id
,create_date_time
,update_date_time
,deleted
,customer_id
,customer_relationship_id
,session_id
,sid
,status
,`user`
,create_time
,platform
,s_name
,seo_source
,seo_keywords
,ip
,referrer
,from_url
,landing_page_url
,url_title
,to_peer
,manual_time
,begin_time
,reply_msg_count
,total_msg_count
,msg_count
,comment
,finish_reason
,finish_user
,end_time
,platform_description
,browser_name
,os_info
,area
,country
,province
,city
,creator
,name
,idcard
,phone
,itcast_school_id
,itcast_school
,itcast_subject_id
,itcast_subject
,wechat
,qq
,email
,gender
,`level`
,origin_type
,information_way
,working_years
,technical_directions
,customer_state
,valid
,anticipat_signup_date
,clue_state
,scrm_department_id
,superior_url
,superior_source
,landing_url
,landing_source
,info_url
,info_source
,origin_channel
,course_id
,course_name
,zhuge_session_id
,is_repeat
,tenant
,activity_id
,activity_name
,follow_type
,shunt_mode_id
,shunt_employee_group_id
,'9999-99-99' end_date
,update_date_time as start_date,
dt,
year(create_date_time) as year,
month(create_date_time) as month,
day(create_date_time) as day
from edu_ods.ods_customer_clue
6,将ods层的数据插入dwd层的表realtionship(拉链表)
insert overwrite table edu_dwd.fact_customer_relationship partition(year,month,day)
select
id
,create_date_time
,update_date_time
,deleted
,customer_id
,first_id
,belonger
,belonger_name
,initial_belonger
,distribution_handler
,business_scrm_department_id
,last_visit_time
,next_visit_time
,origin_type
,itcast_school_id
,itcast_subject_id
,intention_study_type
,anticipat_signup_date
,level
,creator
,current_creator
,creator_name
,origin_channel
,comment
,first_customer_clue_id
,last_customer_clue_id
,process_state
,process_time
,payment_state
,payment_time
,signup_state
,signup_time
,notice_state
,notice_time
,lock_state
,lock_time
,itcast_clazz_id
,itcast_clazz_time
,payment_url
,payment_url_time
,ems_student_id
,delete_reason
,deleter
,deleter_name
,delete_time
,course_id
,course_name
,delete_comment
,close_state
,close_time
,appeal_id
,tenant
,total_fee
,belonged
,belonged_time
,belonger_time
,transfer
,transfer_time
,follow_type
,transfer_bxg_oa_account
,transfer_bxg_belonger_name
,'9999-99-99' end_date
,update_date_time as start_date,
year(create_date_time) as year,
month(create_date_time) as month,
day(create_date_time) as day
from edu_ods.ods_customer_relationship