分析项目需求后的ods层和dwd层建表和插入语句(全量脚本)

ods层

主要在ods干的事,就是根据需求将所需要的表的数据从mysql中导入到ods层中,所以需要在hive中建立ods层的表,并且在导入的时候需要根据需求过滤一下空数据,并在建表时加入一个dt字段记录导入的时间

ods层建表语句

sql 复制代码
1,edu_ods层建立ods_customer_relationship
drop table edu_ods.ods_customer_relationship;
CREATE TABLE edu_ods.ods_customer_relationship (
  `id` bigint ,
  `create_date_time` string ,
  `update_date_time` string COMMENT '最后更新时间',
  `deleted` bigint COMMENT '是否被删除(禁用)',
  `customer_id` bigint COMMENT '所属客户id',
  `first_id` bigint COMMENT '第一条客户关系id',
  `belonger` bigint COMMENT '归属人',
  `belonger_name` string COMMENT '归属人姓名',
  `initial_belonger` bigint COMMENT '初始归属人',
  `distribution_handler` bigint COMMENT '分配处理人',
  `business_scrm_department_id` bigint COMMENT '归属部门',
  `last_visit_time` string COMMENT '最后回访时间',
  `next_visit_time` string COMMENT '下次回访时间',
  `origin_type` string COMMENT '数据来源',
  `itcast_school_id` bigint COMMENT '校区Id',
  `itcast_subject_id` bigint COMMENT '学科Id',
  `intention_study_type` string COMMENT '意向学习方式',
  `anticipat_signup_date` string COMMENT '预计报名时间',
  `level` string COMMENT '客户级别',
  `creator` bigint COMMENT '创建人',
  `current_creator` bigint COMMENT '当前创建人:初始==创建人,当在公海拉回时为 拉回人',
  `creator_name` string COMMENT '创建者姓名',
  `origin_channel` string COMMENT '来源渠道',
  `comment` string COMMENT '备注',
  `first_customer_clue_id` bigint COMMENT '第一条线索id',
  `last_customer_clue_id` bigint COMMENT '最后一条线索id',
  `process_state` string COMMENT '处理状态',
  `process_time` string COMMENT '处理状态变动时间',
  `payment_state` string COMMENT '支付状态',
  `payment_time` string COMMENT '支付状态变动时间',
  `signup_state` string COMMENT '报名状态',
  `signup_time` string COMMENT '报名时间',
  `notice_state` string COMMENT '通知状态',
  `notice_time` string COMMENT '通知状态变动时间',
  `lock_state` bigint COMMENT '锁定状态',
  `lock_time` string COMMENT '锁定状态修改时间',
  `itcast_clazz_id` bigint COMMENT '所属ems班级id',
  `itcast_clazz_time` string COMMENT '报班时间',
  `payment_url` string COMMENT '付款链接',
  `payment_url_time` string COMMENT '支付链接生成时间',
  `ems_student_id` bigint COMMENT 'ems的学生id',
  `delete_reason` string COMMENT '删除原因',
  `deleter` bigint COMMENT '删除人',
  `deleter_name` string COMMENT '删除人姓名',
  `delete_time` string COMMENT '删除时间',
  `course_id` bigint COMMENT '课程ID',
  `course_name` string COMMENT '课程名称',
  `delete_comment` string COMMENT '删除原因说明',
  `close_state` string COMMENT '关闭装填',
  `close_time` string COMMENT '关闭状态变动时间',
  `appeal_id` bigint COMMENT '申诉id',
  `tenant` bigint COMMENT '租户',
  `total_fee` decimal(19,0) COMMENT '报名费总金额',
  `belonged` bigint COMMENT '小周期归属人',
  `belonged_time` string COMMENT '归属时间',
  `belonger_time` string COMMENT '归属时间',
  `transfer` bigint COMMENT '转移人',
  `transfer_time` string COMMENT '转移时间',
  `follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
  `transfer_bxg_oa_account` string COMMENT '转移到博学谷归属人OA账号',
  `transfer_bxg_belonger_name` string COMMENT '转移到博学谷归属人OA姓名'
) COMMENT '客户意向表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');




2,edu_ods层建立ods_customer_clue
drop table edu_ods.`ods_customer_clue`;
CREATE TABLE edu_ods.`ods_customer_clue` (
  `id` bigint ,
  `create_date_time` string COMMENT '创建时间',
  `update_date_time` string COMMENT '最后更新时间',
  `deleted` bigint COMMENT '是否被删除(禁用)',
  `customer_id` bigint COMMENT '客户id',
  `customer_relationship_id` bigint COMMENT '客户关系id',
  `session_id` string COMMENT '七陌会话id',
  `sid` string COMMENT '访客id',
  `status` string COMMENT '状态(undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转)',
  `user` string COMMENT '所属坐席',
  `create_time` string COMMENT '七陌创建时间',
  `platform` string COMMENT '平台来源 (pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询)',
  `s_name` string COMMENT '用户名称',
  `seo_source` string COMMENT '搜索来源',
  `seo_keywords` string COMMENT '关键字',
  `ip` string COMMENT 'IP地址',
  `referrer` string COMMENT '上级来源页面',
  `from_url` string COMMENT '会话来源页面',
  `landing_page_url` string COMMENT '访客着陆页面',
  `url_title` string COMMENT '咨询页面title',
  `to_peer` string COMMENT '所属技能组',
  `manual_time` string COMMENT '人工开始时间',
  `begin_time` string COMMENT '坐席领取时间 ',
  `reply_msg_count` bigint COMMENT '客服回复消息数',
  `total_msg_count` bigint COMMENT '消息总数',
  `msg_count` bigint COMMENT '客户发送消息数',
  `comment` string COMMENT '备注',
  `finish_reason` string COMMENT '结束类型',
  `finish_user` string COMMENT '结束坐席',
  `end_time` string COMMENT '会话结束时间',
  `platform_description` string COMMENT '客户平台信息',
  `browser_name` string COMMENT '浏览器名称',
  `os_info` string COMMENT '系统名称',
  `area` string COMMENT '区域',
  `country` string COMMENT '所在国家',
  `province` string COMMENT '省',
  `city` string COMMENT '城市',
  `creator` bigint COMMENT '创建人',
  `name` string COMMENT '客户姓名',
  `idcard` string COMMENT '身份证号',
  `phone` string COMMENT '手机号',
  `itcast_school_id` bigint COMMENT '校区Id',
  `itcast_school` string COMMENT '校区',
  `itcast_subject_id` bigint COMMENT '学科Id',
  `itcast_subject` string COMMENT '学科',
  `wechat` string COMMENT '微信',
  `qq` string COMMENT 'qq号',
  `email` string COMMENT '邮箱',
  `gender` string COMMENT '性别',
  `level` string COMMENT '客户级别',
  `origin_type` string COMMENT '数据来源渠道',
  `information_way` string COMMENT '资讯方式',
  `working_years` string COMMENT '开始工作时间',
  `technical_directions` string COMMENT '技术方向',
  `customer_state` string COMMENT '当前客户状态',
  `valid` bigint COMMENT '该线索是否是网资有效线索',
  `anticipat_signup_date` string COMMENT '预计报名时间',
  `clue_state` string COMMENT '线索状态',
  `scrm_department_id` bigint COMMENT 'SCRM内部部门id',
  `superior_url` string COMMENT '诸葛获取上级页面URL',
  `superior_source` string COMMENT '诸葛获取上级页面URL标题',
  `landing_url` string COMMENT '诸葛获取着陆页面URL',
  `landing_source` string COMMENT '诸葛获取着陆页面URL来源',
  `info_url` string COMMENT '诸葛获取留咨页URL',
  `info_source` string COMMENT '诸葛获取留咨页URL标题',
  `origin_channel` string COMMENT '投放渠道',
  `course_id` bigint,
  `course_name` string ,
  `zhuge_session_id` string,
  `is_repeat` bigint COMMENT '是否重复线索(手机号维度) 0:正常 1:重复',
  `tenant` bigint COMMENT '租户id',
  `activity_id` string COMMENT '活动id',
  `activity_name` string COMMENT '活动名称',
  `follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
  `shunt_mode_id` bigint COMMENT '匹配到的技能组id',
  `shunt_employee_group_id` bigint COMMENT '所属分流员工组'
) COMMENT '客户线索表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');



3,,edu_ods层建立ods_employee
drop table edu_ods.ods_employee;
create table ods_employee
(
    id                  bigint ,
    email               string comment '公司邮箱,OA登录账号',
    real_name           string comment '员工的真实姓名',
    phone               string comment '手机号,目前还没有使用;隐私问题OA接口没有提供这个属性,',
    department_id       string comment 'OA中的部门编号,有负值',
    department_name     string comment 'OA中的部门名',
    remote_login        bigint      comment '员工是否可以远程登录',
    job_number          string comment '员工工号',
    cross_school        bigint      comment '是否有跨校区权限',
    last_login_date     string comment '最后登录日期',
    creator             bigint  comment '创建人',
    create_date_time    string comment '创建时间',
    update_date_time    string comment '最后更新时间',
    deleted             bigint comment '是否被删除(禁用)',
    scrm_department_id  bigint  comment 'SCRM内部部门id',
    leave_office        bigint comment '离职状态',
    leave_office_time   string comment '离职时间',
    reinstated_time     string comment '复职时间',
    superior_leaders_id bigint comment '上级领导ID',
    tdepart_id          bigint comment '直属部门',
    tenant              bigint,
    ems_user_name       string
)comment '员工信息表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');

4,,edu_ods层建立ods_scrm_department
drop table edu_ods.`ods_scrm_department`;
CREATE TABLE edu_ods.`ods_scrm_department` (
  `id` bigint COMMENT '部门id',
  `name` string COMMENT '部门名称',
  `parent_id` bigint COMMENT '父部门id',
  `create_date_time` string COMMENT '创建时间',
  `update_date_time` string COMMENT '更新时间',
  `deleted` bigint COMMENT '删除标志',
  `id_path` string COMMENT '编码全路径',
  `tdepart_code` bigint COMMENT '直属部门',
  `creator` string COMMENT '创建者',
  `depart_level` bigint COMMENT '部门层级',
  `depart_sign` bigint COMMENT '部门标志,暂时默认1',
  `depart_line` bigint COMMENT '业务线,存储业务线编码',
  `depart_sort` bigint COMMENT '排序字段',
  `disable_flag` bigint COMMENT '禁用标志',
  `tenant` bigint
) comment '部门表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');


5,edu_ods层建立ods_customer_appeal
drop table edu_ods.`ods_customer_appeal`;
CREATE TABLE edu_ods.`ods_customer_appeal` (
  `id` bigint COMMENT '主键',
  `customer_relationship_first_id` bigint COMMENT '第一条客户关系id',
  `employee_id` bigint COMMENT '申诉人',
  `employee_name` string COMMENT '申诉人姓名',
  `employee_department_id` bigint COMMENT '申诉人部门',
  `employee_tdepart_id` bigint COMMENT '申诉人所属部门',
  `appeal_status` bigint COMMENT '申诉状态,0:待稽核 1:无效 2:有效',
  `audit_id` bigint COMMENT '稽核人id',
  `audit_name` string COMMENT '稽核人姓名',
  `audit_department_id` bigint COMMENT '稽核人所在部门',
  `audit_department_name` string COMMENT '稽核人部门名称',
  `audit_date_time` string COMMENT '稽核时间',
  `create_date_time` string COMMENT '创建时间(申诉时间)',
  `update_date_time` string COMMENT '更新时间',
  `deleted` bigint COMMENT '删除标志位',
  `tenant` bigint 
) comment '线索申诉表'
partitioned by (dt string)
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='ZLIB');



6,edu_ods层建立ods_itcast_clazz
CREATE TABLE edu_ods.`ods_itcast_clazz` (
  `id` string     COMMENT 'ems课程id(非自增)',
  `create_date_time` string        COMMENT '创建时间',
  `update_date_time` string      COMMENT '最后更新时间',
  `deleted` string        COMMENT '是否被删除(禁用)',
  `itcast_school_id` string      COMMENT 'ems校区ID',
  `itcast_school_name` string      COMMENT 'ems校区名称',
  `itcast_subject_id` string      COMMENT 'ems学科ID',
  `itcast_subject_name` string      COMMENT 'ems学科名称',
  `itcast_brand` string       COMMENT 'ems品牌',
  `clazz_type_state` string       COMMENT '班级类型状态',
  `clazz_type_name` string      COMMENT '班级类型名称',
  `teaching_mode` string       COMMENT '授课模式',
  `start_time` string    COMMENT '开班时间',
  `end_time` string    COMMENT '毕业时间',
  `comment` string      COMMENT '备注',
  `detail` string       COMMENT '详情(比如:27期)',
  `uncertain` string    COMMENT '待定班(0:否,1:是)',
  `tenant` string 
  )comment '报名课程表'
partitioned by (dt string)
 row format delimited fields terminated by '\t' 
stored as orc tblproperties ('orc.compress'='ZLIB');

数据从mysql导入ods层语句

sql 复制代码
将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query "SELECT  * ,${dateStr} as dt FROM edu_scrm.customer_${tableMonth}  
WHERE wce.create_time BETWEEN '${dateStr} 00:00:00' AND '${dateStr} 23:59:59'  and \$CONDITIONS" \
--hcatalog-database edu_ods \
--hcatalog-table ods_customer \
-m 1


将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_appeal
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_appeal  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_customer_appeal \
--m 1;


将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_clue
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_clue  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_customer_clue  \
--m 1;


将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_relationship
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from customer_relationship  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_customer_relationship  \
--m 1;


将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的employee
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from employee  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_employee  \
--m 1;

将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_clazz
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_clazz  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_itcast_clazz  \
--m 1;

将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_school
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_school  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_itcast_school  \
--m 1;

将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_subject
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from itcast_subject  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_itcast_subject\
--m 1;

将数据从mysql中导入到hive中的edu_ods 数据库下的scrm_department
/usr/bin/sqoop import --connect jdbc:mysql://192.168.88.80:3306/edu_scrm  \
--username root \
--password 123456 \
--query 'select *, "2023-12-07" as dt from scrm_department  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_scrm_department \
--m 1;

全量脚本将数据从mysql导入hive的ods层的全量脚本

sql 复制代码
export SQOOP_HOME=/usr/bin/sqoop


dateStr=`date -d '-1 day' +'%Y-%m-%d'`   ###获取昨天的日期,用于增量导入
tableMonth=`date -d '-1 day' +'%Y_%m'`   ###获取当年的年月,用于导入数据的业务数据库的表名称

jdbcUrl='jdbc:mysql://106.75.33.59:3306/edu_scrm?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true'
username='itcast_edu_stu'
password='itcast_edu_stu'



### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_appeal
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_appeal  where 1=1 and $CONDITIONS' and dt=dateStr  \
--hcatalog-database edu_ods  \
--hcatalog-table ods_customer_appeal \
--m 1;


### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_clue
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_clue  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_customer_clue  \
--m 1;


### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的customer_relationship
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from customer_relationship  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_customer_relationship  \
--m 1;


### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的employee
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from employee  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_employee  \
--m 1;

### 将数据从mysql中导入到hive中的edu_scrm_ods 数据库下的itcast_clazz
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from itcast_clazz  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_itcast_clazz  \
--m 1;


### 将数据从mysql中导入到hive中的edu_ods 数据库下的scrm_department
/usr/bin/sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect ${jdbcUrl} \
--username ${username} \
--password ${password} \
--query 'select *, "2023-12-07" as dt from scrm_department  where 1=1 and $CONDITIONS'   \
--hcatalog-database edu_ods  \
--hcatalog-table ods_scrm_department \
--m 1;

dwd层维度表事实表拉链表建表语句

sql 复制代码
1,创建DWD层的表dim_customer_appeal

drop table edu_dwd.dim_customer_appeal;
CREATE TABLE edu_dwd.`dim_customer_appeal` (
   `id` int,
  `customer_relationship_first_id` int   COMMENT '第一条客户关系id',
  `employee_id` int    COMMENT '申诉人',
  `employee_name` string     COMMENT '申诉人姓名',
  `employee_department_id` int    COMMENT '申诉人部门',
  `employee_tdepart_id` int    COMMENT '申诉人所属部门',
  `appeal_status` int     COMMENT '申诉状态,0:待稽核 1:无效 2:有效',
  `audit_id` int    COMMENT '稽核人id',
  `audit_name` string  COMMENT '稽核人姓名',
  `audit_department_id` int    COMMENT '稽核人所在部门',
  `audit_department_name` string COMMENT '稽核人部门名称',
  `audit_date_time` string    COMMENT '稽核时间',
  `create_date_time` string  COMMENT '创建时间(申诉时间)',
  `update_date_time` string  COMMENT '更新时间',
  `deleted` int  COMMENT '删除标志位',
  `tenant` int ,
`end_date` string COMMENT '失效日期',
    start_date string COMMENT '开始日期'
  ) comment '线索申诉维度拉链表'
  partitioned by (dt string)
clustered by (customer_relationship_first_id) into 6 buckets
  row format delimited fields terminated by '\t' 
stored as orc tblproperties ('orc.compress'='SNAPPY')

relationship,clue,appeal,itcast_clazz,employee,department


2,创建DWD层的表dim_itcast_clazz
drop table edu_dwd.dim_itcast_clazz;
CREATE TABLE edu_dwd.`dim_itcast_clazz` (
  `id` int     COMMENT 'ems课程id(非自增)',
  `create_date_time` string        COMMENT '创建时间',
  `update_date_time` string      COMMENT '最后更新时间',
  `deleted` int        COMMENT '是否被删除(禁用)',
  `itcast_school_id` string      COMMENT 'ems校区ID',
  `itcast_school_name` string      COMMENT 'ems校区名称',
  `itcast_subject_id` string      COMMENT 'ems学科ID',
  `itcast_subject_name` string      COMMENT 'ems学科名称',
  `itcast_brand` string       COMMENT 'ems品牌',
  `clazz_type_state` string       COMMENT '班级类型状态',
  `clazz_type_name` string      COMMENT '班级类型名称',
  `teaching_mode` string       COMMENT '授课模式',
  `start_time` string    COMMENT '开班时间',
  `end_time` string    COMMENT '毕业时间',
  `comment` string      COMMENT '备注',
  `detail` string       COMMENT '详情(比如:27期)',
  `uncertain` int    COMMENT '待定班(0:否,1:是)',
  `tenant` int ,
  `end_date` string COMMENT '失效日期',
    start_date string COMMENT '开始日期',
	dt string 
  )comment '报名课程表'
partitioned by (year string,month string,day string)
clustered by (id) into 3 buckets
 row format delimited fields terminated by '\t' 
stored as orc tblproperties ('orc.compress'='SNAPPY');


3,创建DWD层的表dim_scrm_department

CREATE TABLE edu_dwd.`dim_scrm_department` (
  `id` bigint COMMENT '部门id',
  `name` string COMMENT '部门名称',
  `parent_id` bigint COMMENT '父部门id',
  `create_date_time` string COMMENT '创建时间',
  `update_date_time` string COMMENT '更新时间',
  `deleted` bigint COMMENT '删除标志',
  `id_path` string COMMENT '编码全路径',
  `tdepart_code` bigint COMMENT '直属部门',
  `creator` string COMMENT '创建者',
  `depart_level` bigint COMMENT '部门层级',
  `depart_sign` bigint COMMENT '部门标志,暂时默认1',
  `depart_line` bigint COMMENT '业务线,存储业务线编码',
  `depart_sort` bigint COMMENT '排序字段',
  `disable_flag` bigint COMMENT '禁用标志',
  `tenant` bigint,
  dt string 
) comment '部门表'
partitioned by (yeat string,month string ,day string)
clustered by (id) into 3 buckets 
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');



4,创建了DWD层的表edu_dwd.dim_employee
create table edu_dwd.dim_employee
(
    id                  bigint ,
    email               string comment '公司邮箱,OA登录账号',
    real_name           string comment '员工的真实姓名',
    phone               string comment '手机号,目前还没有使用;隐私问题OA接口没有提供这个属性,',
    department_id       string comment 'OA中的部门编号,有负值',
    department_name     string comment 'OA中的部门名',
    remote_login        bigint      comment '员工是否可以远程登录',
    job_number          string comment '员工工号',
    cross_school        bigint      comment '是否有跨校区权限',
    last_login_date     string comment '最后登录日期',
    creator             bigint  comment '创建人',
    create_date_time    string comment '创建时间',
    update_date_time    string comment '最后更新时间',
    deleted             bigint comment '是否被删除(禁用)',
    scrm_department_id  bigint  comment 'SCRM内部部门id',
    leave_office        bigint comment '离职状态',
    leave_office_time   string comment '离职时间',
    reinstated_time     string comment '复职时间',
    superior_leaders_id bigint comment '上级领导ID',
    tdepart_id          bigint comment '直属部门',
    tenant              bigint,
    ems_user_name       string
)comment '员工信息表'
partitioned by (dt string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');


5,创建DWD层的表ods_customer_clue
drop table edu_dwd.ods_customer_clue;
CREATE TABLE edu_dwd.ods_customer_clue (
  `id` bigint ,
  `create_date_time` string COMMENT '创建时间',
  `update_date_time` string COMMENT '最后更新时间',
  `deleted` bigint COMMENT '是否被删除(禁用)',
  `customer_id` bigint COMMENT '客户id',
  `customer_relationship_id` bigint COMMENT '客户关系id',
  `session_id` string COMMENT '七陌会话id',
  `sid` string COMMENT '访客id',
  `status` string COMMENT '状态(undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转)',
  `user` string COMMENT '所属坐席',
  `create_time` string COMMENT '七陌创建时间',
  `platform` string COMMENT '平台来源 (pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询)',
  `s_name` string COMMENT '用户名称',
  `seo_source` string COMMENT '搜索来源',
  `seo_keywords` string COMMENT '关键字',
  `ip` string COMMENT 'IP地址',
  `referrer` string COMMENT '上级来源页面',
  `from_url` string COMMENT '会话来源页面',
  `landing_page_url` string COMMENT '访客着陆页面',
  `url_title` string COMMENT '咨询页面title',
  `to_peer` string COMMENT '所属技能组',
  `manual_time` string COMMENT '人工开始时间',
  `begin_time` string COMMENT '坐席领取时间 ',
  `reply_msg_count` bigint COMMENT '客服回复消息数',
  `total_msg_count` bigint COMMENT '消息总数',
  `msg_count` bigint COMMENT '客户发送消息数',
  `comment` string COMMENT '备注',
  `finish_reason` string COMMENT '结束类型',
  `finish_user` string COMMENT '结束坐席',
  `end_time` string COMMENT '会话结束时间',
  `platform_description` string COMMENT '客户平台信息',
  `browser_name` string COMMENT '浏览器名称',
  `os_info` string COMMENT '系统名称',
  `area` string COMMENT '区域',
  `country` string COMMENT '所在国家',
  `province` string COMMENT '省',
  `city` string COMMENT '城市',
  `creator` bigint COMMENT '创建人',
  `name` string COMMENT '客户姓名',
  `idcard` string COMMENT '身份证号',
  `phone` string COMMENT '手机号',
  `itcast_school_id` bigint COMMENT '校区Id',
  `itcast_school` string COMMENT '校区',
  `itcast_subject_id` bigint COMMENT '学科Id',
  `itcast_subject` string COMMENT '学科',
  `wechat` string COMMENT '微信',
  `qq` string COMMENT 'qq号',
  `email` string COMMENT '邮箱',
  `gender` string COMMENT '性别',
  `level` string COMMENT '客户级别',
  `origin_type` string COMMENT '数据来源渠道',
  `information_way` string COMMENT '资讯方式',
  `working_years` string COMMENT '开始工作时间',
  `technical_directions` string COMMENT '技术方向',
  `customer_state` string COMMENT '当前客户状态',
  `valid` bigint COMMENT '该线索是否是网资有效线索',
  `anticipat_signup_date` string COMMENT '预计报名时间',
  `clue_state` string COMMENT '线索状态',
  `scrm_department_id` bigint COMMENT 'SCRM内部部门id',
  `superior_url` string COMMENT '诸葛获取上级页面URL',
  `superior_source` string COMMENT '诸葛获取上级页面URL标题',
  `landing_url` string COMMENT '诸葛获取着陆页面URL',
  `landing_source` string COMMENT '诸葛获取着陆页面URL来源',
  `info_url` string COMMENT '诸葛获取留咨页URL',
  `info_source` string COMMENT '诸葛获取留咨页URL标题',
  `origin_channel` string COMMENT '投放渠道',
  `course_id` bigint,
  `course_name` string ,
  `zhuge_session_id` string,
  `is_repeat` bigint COMMENT '是否重复线索(手机号维度) 0:正常 1:重复',
  `tenant` bigint COMMENT '租户id',
  `activity_id` string COMMENT '活动id',
  `activity_name` string COMMENT '活动名称',
  `follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
  `shunt_mode_id` bigint COMMENT '匹配到的技能组id',
  `shunt_employee_group_id` bigint COMMENT '所属分流员工组',
  `end_date` string COMMENT '失效日期',
    start_date string COMMENT '开始日期'
	dt string
) COMMENT '客户线索表'
partitioned by (year string,month string  ,day string)
clustered by (customer_relationship_id) into 6 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');


6,创建DWD层的表fact_customer_relationship
drop table edu_ods.ods_customer_relationship;
CREATE TABLE  edu_dwd.fact_customer_relationship(
  `id` bigint ,
  `create_date_time` string ,
  `update_date_time` string COMMENT '最后更新时间',
  `deleted` bigint COMMENT '是否被删除(禁用)',
  `customer_id` bigint COMMENT '所属客户id',
  `first_id` bigint COMMENT '第一条客户关系id',
  `belonger` bigint COMMENT '归属人',
  `belonger_name` string COMMENT '归属人姓名',
  `initial_belonger` bigint COMMENT '初始归属人',
  `distribution_handler` bigint COMMENT '分配处理人',
  `business_scrm_department_id` bigint COMMENT '归属部门',
  `last_visit_time` string COMMENT '最后回访时间',
  `next_visit_time` string COMMENT '下次回访时间',
  `origin_type` string COMMENT '数据来源',
  `itcast_school_id` bigint COMMENT '校区Id',
  `itcast_subject_id` bigint COMMENT '学科Id',
  `intention_study_type` string COMMENT '意向学习方式',
  `anticipat_signup_date` string COMMENT '预计报名时间',
  `level` string COMMENT '客户级别',
  `creator` bigint COMMENT '创建人',
  `current_creator` bigint COMMENT '当前创建人:初始==创建人,当在公海拉回时为 拉回人',
  `creator_name` string COMMENT '创建者姓名',
  `origin_channel` string COMMENT '来源渠道',
  `comment` string COMMENT '备注',
  `first_customer_clue_id` bigint COMMENT '第一条线索id',
  `last_customer_clue_id` bigint COMMENT '最后一条线索id',
  `process_state` string COMMENT '处理状态',
  `process_time` string COMMENT '处理状态变动时间',
  `payment_state` string COMMENT '支付状态',
  `payment_time` string COMMENT '支付状态变动时间',
  `signup_state` string COMMENT '报名状态',
  `signup_time` string COMMENT '报名时间',
  `notice_state` string COMMENT '通知状态',
  `notice_time` string COMMENT '通知状态变动时间',
  `lock_state` bigint COMMENT '锁定状态',
  `lock_time` string COMMENT '锁定状态修改时间',
  `itcast_clazz_id` bigint COMMENT '所属ems班级id',
  `itcast_clazz_time` string COMMENT '报班时间',
  `payment_url` string COMMENT '付款链接',
  `payment_url_time` string COMMENT '支付链接生成时间',
  `ems_student_id` bigint COMMENT 'ems的学生id',
  `delete_reason` string COMMENT '删除原因',
  `deleter` bigint COMMENT '删除人',
  `deleter_name` string COMMENT '删除人姓名',
  `delete_time` string COMMENT '删除时间',
  `course_id` bigint COMMENT '课程ID',
  `course_name` string COMMENT '课程名称',
  `delete_comment` string COMMENT '删除原因说明',
  `close_state` string COMMENT '关闭装填',
  `close_time` string COMMENT '关闭状态变动时间',
  `appeal_id` bigint COMMENT '申诉id',
  `tenant` bigint COMMENT '租户',
  `total_fee` decimal(19,0) COMMENT '报名费总金额',
  `belonged` bigint COMMENT '小周期归属人',
  `belonged_time` string COMMENT '归属时间',
  `belonger_time` string COMMENT '归属时间',
  `transfer` bigint COMMENT '转移人',
  `transfer_time` string COMMENT '转移时间',
  `follow_type` bigint COMMENT '分配类型,0-自动分配,1-手动分配,2-自动转移,3-手动单个转移,4-手动批量转移,5-公海领取',
  `transfer_bxg_oa_account` string COMMENT '转移到博学谷归属人OA账号',
  `transfer_bxg_belonger_name` string COMMENT '转移到博学谷归属人OA姓名'
) COMMENT '客户意向表'
partitioned by (year string,month string ,day string)
clustered by (id) into 3 buckets
row format delimited fields terminated by '\t' stored as orc tblproperties ('orc.compress'='SNAPPY');

数据从ods层导入dwd层语句

sql 复制代码
1,将ods层的数据插入dwd层的表dim_customer_appeal(拉链表)
insert overwrite table edu_dwd.dim_customer_appeal   PARTITION (dt)
select 
  `id` ,
  `customer_relationship_first_id`   ,
  `employee_id` ,
  `employee_name`   ,
  `employee_department_id`,
  `employee_tdepart_id`,
  `appeal_status`,
  `audit_id`,
  `audit_name`,
  `audit_department_id` ,
  `audit_department_name` ,
  `audit_date_time` ,
  `create_date_time`,
  `update_date_time`,
  `deleted`,
  `tenant` ,
'9999-99-99' end_date,
update_date_time as start_date,
dt
from edu_ods.ods_customer_appeal


2,,将ods层的数据插入dwd层的表edu_dwd.`dim_itcast_clazz`(拉链表)
insert overwrite table edu_dwd.`dim_itcast_clazz`  partition(year,month ,day)
select 
id
,create_date_time
,update_date_time
,deleted
,itcast_school_id
,itcast_school_name
,itcast_subject_id
,itcast_subject_name
,itcast_brand
,clazz_type_state
,clazz_type_name
,teaching_mode
,start_time
,end_time
,comment
,detail
,uncertain
,tenant
,'9999-99-99' end_date
,update_date_time as start_date
,dt string
,year(create_date_time)
,month(create_date_time)
,day(create_date_time)
from 
edu_ods.ods_itcast_clazz

3,将ods层的数据插入dwd层的表dim_scrm_department

insert overwrite table edu_dwd.dim_scrm_department  partition(year,month,day)
select 
id
,name
,parent_id
,create_date_time
,update_date_time
,deleted
,id_path
,tdepart_code
,creator
,depart_level
,depart_sign
,depart_line
,depart_sort
,disable_flag
,tenant
,dt
,year(create_date_time) as year
,month(create_date_time) as month 
,day(create_date_time) as day
from edu_ods.ods_scrm_department


4,将ods层的数据插入dwd层的表edu_dwd.dim_employee
insert overwrite table edu_dwd.dim_employee  partition(dt)
select 
,id
,email
,real_name
,phone
,department_id
,department_name
,remote_login
,job_number
,cross_school
,last_login_date
,creator
,create_date_time
,update_date_time
,deleted
,scrm_department_id
,leave_office
,leave_office_time
,reinstated_time
,superior_leaders_id
,tdepart_id
,tenant
,ems_user_name
,dt
from deu_ods.ods_employee



5,将ods层的数据插入dwd层的表clue(拉链表)
insert overwrite table edu_dwd.fact_customer_clue partition(year,month,day)
select 
id
,create_date_time
,update_date_time
,deleted
,customer_id
,customer_relationship_id
,session_id
,sid
,status
,`user`
,create_time
,platform
,s_name
,seo_source
,seo_keywords
,ip
,referrer
,from_url
,landing_page_url
,url_title
,to_peer
,manual_time
,begin_time
,reply_msg_count
,total_msg_count
,msg_count
,comment
,finish_reason
,finish_user
,end_time
,platform_description
,browser_name
,os_info
,area
,country
,province
,city
,creator
,name
,idcard
,phone
,itcast_school_id
,itcast_school
,itcast_subject_id
,itcast_subject
,wechat
,qq
,email
,gender
,`level`
,origin_type
,information_way
,working_years
,technical_directions
,customer_state
,valid
,anticipat_signup_date
,clue_state
,scrm_department_id
,superior_url
,superior_source
,landing_url
,landing_source
,info_url
,info_source
,origin_channel
,course_id
,course_name
,zhuge_session_id
,is_repeat
,tenant
,activity_id
,activity_name
,follow_type
,shunt_mode_id
,shunt_employee_group_id
,'9999-99-99' end_date
,update_date_time as start_date,
dt,
year(create_date_time) as year,
month(create_date_time) as month,
day(create_date_time) as day
from edu_ods.ods_customer_clue




6,将ods层的数据插入dwd层的表realtionship(拉链表)

insert overwrite table edu_dwd.fact_customer_relationship  partition(year,month,day)
select 
id
,create_date_time
,update_date_time
,deleted
,customer_id
,first_id
,belonger
,belonger_name
,initial_belonger
,distribution_handler
,business_scrm_department_id
,last_visit_time
,next_visit_time
,origin_type
,itcast_school_id
,itcast_subject_id
,intention_study_type
,anticipat_signup_date
,level
,creator
,current_creator
,creator_name
,origin_channel
,comment
,first_customer_clue_id
,last_customer_clue_id
,process_state
,process_time
,payment_state
,payment_time
,signup_state
,signup_time
,notice_state
,notice_time
,lock_state
,lock_time
,itcast_clazz_id
,itcast_clazz_time
,payment_url
,payment_url_time
,ems_student_id
,delete_reason
,deleter
,deleter_name
,delete_time
,course_id
,course_name
,delete_comment
,close_state
,close_time
,appeal_id
,tenant
,total_fee
,belonged
,belonged_time
,belonger_time
,transfer
,transfer_time
,follow_type 
,transfer_bxg_oa_account
,transfer_bxg_belonger_name
,'9999-99-99' end_date
,update_date_time as start_date,
	year(create_date_time) as year,
	month(create_date_time) as month,
	day(create_date_time) as day
	from edu_ods.ods_customer_relationship
相关推荐
PersistJiao1 小时前
在 Spark RDD 中,sortBy 和 top 算子的各自适用场景
大数据·spark·top·sortby
2301_811274311 小时前
大数据基于Spring Boot的化妆品推荐系统的设计与实现
大数据·spring boot·后端
Yz98761 小时前
hive的存储格式
大数据·数据库·数据仓库·hive·hadoop·数据库开发
青云交1 小时前
大数据新视界 -- 大数据大厂之 Hive 数据导入:多源数据集成的策略与实战(上)(3/ 30)
大数据·数据清洗·电商数据·数据整合·hive 数据导入·多源数据·影视娱乐数据
武子康1 小时前
大数据-230 离线数仓 - ODS层的构建 Hive处理 UDF 与 SerDe 处理 与 当前总结
java·大数据·数据仓库·hive·hadoop·sql·hdfs
武子康1 小时前
大数据-231 离线数仓 - DWS 层、ADS 层的创建 Hive 执行脚本
java·大数据·数据仓库·hive·hadoop·mysql
时差9532 小时前
Flink Standalone集群模式安装部署
大数据·分布式·flink·部署
锵锵锵锵~蒋2 小时前
实时数据开发 | 怎么通俗理解Flink容错机制,提到的checkpoint、barrier、Savepoint、sink都是什么
大数据·数据仓库·flink·实时数据开发
二进制_博客2 小时前
Flink学习连载文章4-flink中的各种转换操作
大数据·学习·flink
大数据编程之光2 小时前
Flink入门介绍
大数据·flink