1.基本语法
NVL(表达式1,表达式2) 如果表达式1为空值,NVL返回值为表达式2的值,否则返回表达式1的值。 该函数的目的是把一个空值(null)转换成一个实际的值。其表达式的值可以是数字型、字符型和日期型。但是表达式1和表达式2的数据类型必须为同一个类型。2.案例实操
select nvl(1,0); 1 hive (gmall)> select nvl(null,"hello"); hello1.取当前天的下一个周一
select next_day('2020-06-14','MO'); 2020-06-15说明:星期一到星期日的英文(Monday,Tuesday、Wednesday、Thursday、Friday、Saturday、Sunday)
2.取当前周的周一
select date_add(next_day('2020-06-14','MO'),-7); 2020-06-81.map结构数据定义
map<string,string>2.array结构数据定义
array<string>3.struct结构数据定义
struct<id:int,name:string,age:int>4.struct和array嵌套定义
array<struct<id:int,name:string,age:int>>每日设备行为,主要按照设备id统计
1.建表语句
create external table dws_uv_detail_daycount ( `mid_id` string COMMENT '设备id', `brand` string COMMENT '手机品牌', `model` string COMMENT '手机型号', `login_count` bigint COMMENT '活跃次数', `page_stats` array<struct<page_id:string,page_count:bigint>> COMMENT '页面访问统计' ) COMMENT '每日设备行为表' partitioned by(dt string) stored as parquet location '/warehouse/gmall/dws/dws_uv_detail_daycount' tblproperties ("parquet.compression"="lzo");2.数据装载
with tmp_start as ( select mid_id, brand, model, count(*) login_count from dwd_start_log where dt='2020-06-14' group by mid_id,brand,model ), tmp_page as ( select mid_id, brand, model, collect_set(named_struct('page_id',page_id,'page_count',page_count)) page_stats from ( select mid_id, brand, model, page_id, count(*) page_count from dwd_page_log where dt='2020-06-14' group by mid_id,brand,model,page_id )tmp group by mid_id,brand,model ) insert overwrite table dws_uv_detail_daycount partition(dt='2020-06-14') select nvl(tmp_start.mid_id,tmp_page.mid_id), nvl(tmp_start.brand,tmp_page.brand), nvl(tmp_start.model,tmp_page.model), tmp_start.login_count, tmp_page.page_stats from tmp_start full outer join tmp_page on tmp_start.mid_id=tmp_page.mid_id and tmp_start.brand=tmp_page.brand and tmp_start.model=tmp_page.model;1.建表语句
create external table dws_user_action_daycount ( user_id string comment '用户 id', login_count bigint comment '登录次数', cart_count bigint comment '加入购物车次数', order_count bigint comment '下单次数', order_amount decimal(16,2) comment '下单金额', payment_count bigint comment '支付次数', payment_amount decimal(16,2) comment '支付金额', order_detail_stats array<struct<sku_id:string,sku_num:bigint,order_count:bigint,order_amount:decimal(20,2)>> comment '下单明细统计' ) COMMENT '每日会员行为' PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_user_action_daycount/' tblproperties ("parquet.compression"="lzo");2.数据装载
with tmp_login as ( select user_id, count(*) login_count from dwd_start_log where dt='2020-06-14' and user_id is not null group by user_id ), tmp_cart as ( select user_id, count(*) cart_count from dwd_action_log where dt='2020-06-14' and user_id is not null and action_id='cart_add' group by user_id ),tmp_order as ( select user_id, count(*) order_count, sum(final_total_amount) order_amount from dwd_fact_order_info where dt='2020-06-14' group by user_id ) , tmp_payment as ( select user_id, count(*) payment_count, sum(payment_amount) payment_amount from dwd_fact_payment_info where dt='2020-06-14' group by user_id ), tmp_order_detail as ( select user_id, collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'order_amount',order_amount)) order_stats from ( select user_id, sku_id, sum(sku_num) sku_num, count(*) order_count, cast(sum(final_amount_d) as decimal(20,2)) order_amount from dwd_fact_order_detail where dt='2020-06-14' group by user_id,sku_id )tmp group by user_id ) insert overwrite table dws_user_action_daycount partition(dt='2020-06-14') select tmp_login.user_id, login_count, nvl(cart_count,0), nvl(order_count,0), nvl(order_amount,0.0), nvl(payment_count,0), nvl(payment_amount,0.0), order_stats from tmp_login left join tmp_cart on tmp_login.user_id=tmp_cart.user_id left join tmp_order on tmp_login.user_id=tmp_order.user_id left join tmp_payment on tmp_login.user_id=tmp_payment.user_id left join tmp_order_detail on tmp_login.user_id=tmp_order_detail.user_id; 3)1.建表语句
create external table dws_sku_action_daycount ( sku_id string comment 'sku_id', order_count bigint comment '被下单次数', order_num bigint comment '被下单件数', order_amount decimal(16,2) comment '被下单金额', payment_count bigint comment '被支付次数', payment_num bigint comment '被支付件数', payment_amount decimal(16,2) comment '被支付金额', refund_count bigint comment '被退款次数', refund_num bigint comment '被退款件数', refund_amount decimal(16,2) comment '被退款金额', cart_count bigint comment '被加入购物车次数', favor_count bigint comment '被收藏次数', appraise_good_count bigint comment '好评数', appraise_mid_count bigint comment '中评数', appraise_bad_count bigint comment '差评数', appraise_default_count bigint comment '默认评价数' ) COMMENT '每日商品行为' PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_sku_action_daycount/' tblproperties ("parquet.compression"="lzo");2.数据装载
注意:如果是23点59下单,支付日期跨天。需要从订单详情里面取出支付时间是今天,且订单时间是昨天或者今天的订单。
with tmp_order as ( select sku_id, count(*) order_count, sum(sku_num) order_num, sum(final_amount_d) order_amount from dwd_fact_order_detail where dt='2020-06-14' group by sku_id ), tmp_payment as ( select sku_id, count(*) payment_count, sum(sku_num) payment_num, sum(final_amount_d) payment_amount from dwd_fact_order_detail where (dt='2020-06-14' or dt=date_add('2020-06-14',-1)) and order_id in ( select order_id from dwd_fact_payment_info where dt='2020-06-14' ) group by sku_id ), tmp_refund as ( select sku_id, count(*) refund_count, sum(refund_num) refund_num, sum(refund_amount) refund_amount from dwd_fact_order_refund_info where dt='2020-06-14' group by sku_id ), tmp_cart as ( select item sku_id, count(*) cart_count from dwd_action_log where dt='2020-06-14' and user_id is not null and action_id='cart_add' group by item ), tmp_favor as ( select item sku_id, count(*) favor_count from dwd_action_log where dt='2020-06-14' and user_id is not null and action_id='favor_add' group by item ), tmp_appraise as ( select sku_id, sum(if(appraise='1201',1,0)) appraise_good_count, sum(if(appraise='1202',1,0)) appraise_mid_count, sum(if(appraise='1203',1,0)) appraise_bad_count, sum(if(appraise='1204',1,0)) appraise_default_count from dwd_fact_comment_info where dt='2020-06-14' group by sku_id ) insert overwrite table dws_sku_action_daycount partition(dt='2020-06-14') select sku_id, sum(order_count), sum(order_num), sum(order_amount), sum(payment_count), sum(payment_num), sum(payment_amount), sum(refund_count), sum(refund_num), sum(refund_amount), sum(cart_count), sum(favor_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count) from ( select sku_id, order_count, order_num, order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_order union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, payment_count, payment_num, payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_payment union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, refund_count, refund_num, refund_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_refund union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_cart union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_favor union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, 0 favor_count, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count from tmp_appraise )tmp group by sku_id;1.建表语句
create external table dws_activity_info_daycount( `id` string COMMENT '编号', `activity_name` string COMMENT '活动名称', `activity_type` string COMMENT '活动类型', `start_time` string COMMENT '开始时间', `end_time` string COMMENT '结束时间', `create_time` string COMMENT '创建时间', `display_count` bigint COMMENT '曝光次数', `order_count` bigint COMMENT '下单次数', `order_amount` decimal(20,2) COMMENT '下单金额', `payment_count` bigint COMMENT '支付次数', `payment_amount` decimal(20,2) COMMENT '支付金额' ) COMMENT '每日活动统计' PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_activity_info_daycount/' tblproperties ("parquet.compression"="lzo");2.数据装载
with tmp_op as ( select activity_id, sum(if(date_format(create_time,'yyyy-MM-dd')='2020-06-14',1,0)) order_count, sum(if(date_format(create_time,'yyyy-MM-dd')='2020-06-14',final_total_amount,0)) order_amount, sum(if(date_format(payment_time,'yyyy-MM-dd')='2020-06-14',1,0)) payment_count, sum(if(date_format(payment_time,'yyyy-MM-dd')='2020-06-14',final_total_amount,0)) payment_amount from dwd_fact_order_info where (dt='2020-06-14' or dt=date_add('2020-06-14',-1)) and activity_id is not null group by activity_id ), tmp_display as ( select item activity_id, count(*) display_count from dwd_display_log where dt='2020-06-14' and item_type='activity_id' group by item ), tmp_activity as ( select * from dwd_dim_activity_info where dt='2020-06-14' ) insert overwrite table dws_activity_info_daycount partition(dt='2020-06-14') select nvl(tmp_op.activity_id,tmp_display.activity_id), tmp_activity.activity_name, tmp_activity.activity_type, tmp_activity.start_time, tmp_activity.end_time, tmp_activity.create_time, tmp_display.display_count, tmp_op.order_count, tmp_op.order_amount, tmp_op.payment_count, tmp_op.payment_amount from tmp_op full outer join tmp_display on tmp_op.activity_id=tmp_display.activity_id left join tmp_activity on nvl(tmp_op.activity_id,tmp_display.activity_id)=tmp_activity.id;1.建表语句
create external table dws_area_stats_daycount( `id` bigint COMMENT '编号', `province_name` string COMMENT '省份名称', `area_code` string COMMENT '地区编码', `iso_code` string COMMENT 'iso编码', `region_id` string COMMENT '地区ID', `region_name` string COMMENT '地区名称', `login_count` string COMMENT '活跃设备数', `order_count` bigint COMMENT '下单次数', `order_amount` decimal(20,2) COMMENT '下单金额', `payment_count` bigint COMMENT '支付次数', `payment_amount` decimal(20,2) COMMENT '支付金额' ) COMMENT '每日地区统计表' PARTITIONED BY (`dt` string) stored as parquet location '/warehouse/gmall/dws/dws_area_stats_daycount/' tblproperties ("parquet.compression"="lzo");2.数据装载
with tmp_login as ( select area_code, count(*) login_count from dwd_start_log where dt='2020-06-14' group by area_code ), tmp_op as ( select province_id, sum(if(date_format(create_time,'yyyy-MM-dd')='2020-06-14',1,0)) order_count, sum(if(date_format(create_time,'yyyy-MM-dd')='2020-06-14',final_total_amount,0)) order_amount, sum(if(date_format(payment_time,'yyyy-MM-dd')='2020-06-14',1,0)) payment_count, sum(if(date_format(payment_time,'yyyy-MM-dd')='2020-06-14',final_total_amount,0)) payment_amount from dwd_fact_order_info where (dt='2020-06-14' or dt=date_add('2020-06-14',-1)) group by province_id ) insert overwrite table dws_area_stats_daycount partition(dt='2020-06-14') select pro.id, pro.province_name, pro.area_code, pro.iso_code, pro.region_id, pro.region_name, nvl(tmp_login.login_count,0), nvl(tmp_op.order_count,0), nvl(tmp_op.order_amount,0.0), nvl(tmp_op.payment_count,0), nvl(tmp_op.payment_amount,0.0) from dwd_dim_base_province pro left join tmp_login on pro.area_code=tmp_login.area_code left join tmp_op on pro.id=tmp_op.province_id;1.在/home/atguigu/bin目录下创建脚本dwd_to_dws.sh
vim dwd_to_dws.sh2.在脚本中填写如下内容
#!/bin/bash APP=gmall hive=/opt/module/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" set mapreduce.job.queuename=hive; with tmp_start as ( select mid_id, brand, model, count(*) login_count from ${APP}.dwd_start_log where dt='$do_date' group by mid_id,brand,model ), tmp_page as ( select mid_id, brand, model, collect_set(named_struct('page_id',page_id,'page_count',page_count)) page_stats from ( select mid_id, brand, model, page_id, count(*) page_count from ${APP}.dwd_page_log where dt='$do_date' group by mid_id,brand,model,page_id )tmp group by mid_id,brand,model ) insert overwrite table ${APP}.dws_uv_detail_daycount partition(dt='$do_date') select nvl(tmp_start.mid_id,tmp_page.mid_id), nvl(tmp_start.brand,tmp_page.brand), nvl(tmp_start.model,tmp_page.model), tmp_start.login_count, tmp_page.page_stats from tmp_start full outer join tmp_page on tmp_start.mid_id=tmp_page.mid_id and tmp_start.brand=tmp_page.brand and tmp_start.model=tmp_page.model; with tmp_login as ( select user_id, count(*) login_count from ${APP}.dwd_start_log where dt='$do_date' and user_id is not null group by user_id ), tmp_cart as ( select user_id, count(*) cart_count from ${APP}.dwd_action_log where dt='$do_date' and user_id is not null and action_id='cart_add' group by user_id ),tmp_order as ( select user_id, count(*) order_count, sum(final_total_amount) order_amount from ${APP}.dwd_fact_order_info where dt='$do_date' group by user_id ) , tmp_payment as ( select user_id, count(*) payment_count, sum(payment_amount) payment_amount from ${APP}.dwd_fact_payment_info where dt='$do_date' group by user_id ), tmp_order_detail as ( select user_id, collect_set(named_struct('sku_id',sku_id,'sku_num',sku_num,'order_count',order_count,'order_amount',order_amount)) order_stats from ( select user_id, sku_id, sum(sku_num) sku_num, count(*) order_count, cast(sum(final_amount_d) as decimal(20,2)) order_amount from ${APP}.dwd_fact_order_detail where dt='$do_date' group by user_id,sku_id )tmp group by user_id ) insert overwrite table ${APP}.dws_user_action_daycount partition(dt='$do_date') select tmp_login.user_id, login_count, nvl(cart_count,0), nvl(order_count,0), nvl(order_amount,0.0), nvl(payment_count,0), nvl(payment_amount,0.0), order_stats from tmp_login left outer join tmp_cart on tmp_login.user_id=tmp_cart.user_id left outer join tmp_order on tmp_login.user_id=tmp_order.user_id left outer join tmp_payment on tmp_login.user_id=tmp_payment.user_id left outer join tmp_order_detail on tmp_login.user_id=tmp_order_detail.user_id; with tmp_order as ( select sku_id, count(*) order_count, sum(sku_num) order_num, sum(final_amount_d) order_amount from ${APP}.dwd_fact_order_detail where dt='$do_date' group by sku_id ), tmp_payment as ( select sku_id, count(*) payment_count, sum(sku_num) payment_num, sum(final_amount_d) payment_amount from ${APP}.dwd_fact_order_detail where (dt='$do_date' or dt=date_add('$do_date',-1)) and order_id in ( select order_id from ${APP}.dwd_fact_payment_info where dt='$do_date' ) group by sku_id ), tmp_refund as ( select sku_id, count(*) refund_count, sum(refund_num) refund_num, sum(refund_amount) refund_amount from ${APP}.dwd_fact_order_refund_info where dt='$do_date' group by sku_id ), tmp_cart as ( select item sku_id, count(*) cart_count from ${APP}.dwd_action_log where dt='$do_date' and user_id is not null and action_id='cart_add' group by item ),tmp_favor as ( select item sku_id, count(*) favor_count from ${APP}.dwd_action_log where dt='$do_date' and user_id is not null and action_id='favor_add' group by item ), tmp_appraise as ( select sku_id, sum(if(appraise='1201',1,0)) appraise_good_count, sum(if(appraise='1202',1,0)) appraise_mid_count, sum(if(appraise='1203',1,0)) appraise_bad_count, sum(if(appraise='1204',1,0)) appraise_default_count from ${APP}.dwd_fact_comment_info where dt='$do_date' group by sku_id ) insert overwrite table ${APP}.dws_sku_action_daycount partition(dt='$do_date') select sku_id, sum(order_count), sum(order_num), sum(order_amount), sum(payment_count), sum(payment_num), sum(payment_amount), sum(refund_count), sum(refund_num), sum(refund_amount), sum(cart_count), sum(favor_count), sum(appraise_good_count), sum(appraise_mid_count), sum(appraise_bad_count), sum(appraise_default_count) from ( select sku_id, order_count, order_num, order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_order union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, payment_count, payment_num, payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_payment union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, refund_count, refund_num, refund_amount, 0 cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_refund union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, cart_count, 0 favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_cart union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, favor_count, 0 appraise_good_count, 0 appraise_mid_count, 0 appraise_bad_count, 0 appraise_default_count from tmp_favor union all select sku_id, 0 order_count, 0 order_num, 0 order_amount, 0 payment_count, 0 payment_num, 0 payment_amount, 0 refund_count, 0 refund_num, 0 refund_amount, 0 cart_count, 0 favor_count, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count from tmp_appraise )tmp group by sku_id; with tmp_login as ( select area_code, count(*) login_count from ${APP}.dwd_start_log where dt='$do_date' group by area_code ), tmp_op as ( select province_id, sum(if(date_format(create_time,'yyyy-MM-dd')='$do_date',1,0)) order_count, sum(if(date_format(create_time,'yyyy-MM-dd')='$do_date',final_total_amount,0)) order_amount, sum(if(date_format(payment_time,'yyyy-MM-dd')='$do_date',1,0)) payment_count, sum(if(date_format(payment_time,'yyyy-MM-dd')='$do_date',final_total_amount,0)) payment_amount from ${APP}.dwd_fact_order_info where (dt='$do_date' or dt=date_add('$do_date',-1)) group by province_id ) insert overwrite table ${APP}.dws_area_stats_daycount partition(dt='$do_date') select pro.id, pro.province_name, pro.area_code, pro.iso_code, pro.region_id, pro.region_name, nvl(tmp_login.login_count,0), nvl(tmp_op.order_count,0), nvl(tmp_op.order_amount,0.0), nvl(tmp_op.payment_count,0), nvl(tmp_op.payment_amount,0.0) from ${APP}.dwd_dim_base_province pro left join tmp_login on pro.area_code=tmp_login.area_code left join tmp_op on pro.id=tmp_op.province_id; with tmp_op as ( select activity_id, sum(if(date_format(create_time,'yyyy-MM-dd')='$do_date',1,0)) order_count, sum(if(date_format(create_time,'yyyy-MM-dd')='$do_date',final_total_amount,0)) order_amount, sum(if(date_format(payment_time,'yyyy-MM-dd')='$do_date',1,0)) payment_count, sum(if(date_format(payment_time,'yyyy-MM-dd')='$do_date',final_total_amount,0)) payment_amount from ${APP}.dwd_fact_order_info where (dt='$do_date' or dt=date_add('$do_date',-1)) and activity_id is not null group by activity_id ), tmp_display as ( select item activity_id, count(*) display_count from ${APP}.dwd_display_log where dt='$do_date' and item_type='activity_id' group by item ), tmp_activity as ( select * from ${APP}.dwd_dim_activity_info where dt='$do_date' ) insert overwrite table ${APP}.dws_activity_info_daycount partition(dt='$do_date') select nvl(tmp_op.activity_id,tmp_display.activity_id), tmp_activity.activity_name, tmp_activity.activity_type, tmp_activity.start_time, tmp_activity.end_time, tmp_activity.create_time, tmp_display.display_count, tmp_op.order_count, tmp_op.order_amount, tmp_op.payment_count, tmp_op.payment_amount from tmp_op full outer join tmp_display on tmp_op.activity_id=tmp_display.activity_id left join tmp_activity on nvl(tmp_op.activity_id,tmp_display.activity_id)=tmp_activity.id; " $hive -e "$sql"增加脚本执行权限
chmod 777 dwd_to_dws.sh1.建表语句
create external table dwt_uv_topic ( `mid_id` string comment '设备id', `brand` string comment '手机品牌', `model` string comment '手机型号', `login_date_first` string comment '首次活跃时间', `login_date_last` string comment '末次活跃时间', `login_day_count` bigint comment '当日活跃次数', `login_count` bigint comment '累积活跃天数' ) COMMENT '设备主题宽表' stored as parquet location '/warehouse/gmall/dwt/dwt_uv_topic' tblproperties ("parquet.compression"="lzo");2.数据装载
insert overwrite table dwt_uv_topic select nvl(new.mid_id,old.mid_id), nvl(new.model,old.model), nvl(new.brand,old.brand), if(old.mid_id is null,'2020-06-14',old.login_date_first), if(new.mid_id is not null,'2020-06-14',old.login_date_last), if(new.mid_id is not null, new.login_count,0), nvl(old.login_count,0)+if(new.login_count>0,1,0) from ( select * from dwt_uv_topic )old full outer join ( select * from dws_uv_detail_daycount where dt='2020-06-14' )new on old.mid_id=new.mid_id;1.建表语句
create external table dwt_user_topic ( user_id string comment '用户id', login_date_first string comment '首次登录时间', login_date_last string comment '末次登录时间', login_count bigint comment '累积登录天数', login_last_30d_count bigint comment '最近30日登录天数', order_date_first string comment '首次下单时间', order_date_last string comment '末次下单时间', order_count bigint comment '累积下单次数', order_amount decimal(16,2) comment '累积下单金额', order_last_30d_count bigint comment '最近30日下单次数', order_last_30d_amount bigint comment '最近30日下单金额', payment_date_first string comment '首次支付时间', payment_date_last string comment '末次支付时间', payment_count decimal(16,2) comment '累积支付次数', payment_amount decimal(16,2) comment '累积支付金额', payment_last_30d_count decimal(16,2) comment '最近30日支付次数', payment_last_30d_amount decimal(16,2) comment '最近30日支付金额' )COMMENT '会员主题宽表' stored as parquet location '/warehouse/gmall/dwt/dwt_user_topic/' tblproperties ("parquet.compression"="lzo");2.数据装载
insert overwrite table dwt_user_topic select nvl(new.user_id,old.user_id), if(old.login_date_first is null and new.login_count>0,'2020-06-14',old.login_date_first), if(new.login_count>0,'2020-06-14',old.login_date_last), nvl(old.login_count,0)+if(new.login_count>0,1,0), nvl(new.login_last_30d_count,0), if(old.order_date_first is null and new.order_count>0,'2020-06-14',old.order_date_first), if(new.order_count>0,'2020-06-14',old.order_date_last), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.order_amount,0)+nvl(new.order_amount,0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0), if(old.payment_date_first is null and new.payment_count>0,'2020-06-14',old.payment_date_first), if(new.payment_count>0,'2020-06-14',old.payment_date_last), nvl(old.payment_count,0)+nvl(new.payment_count,0), nvl(old.payment_amount,0)+nvl(new.payment_amount,0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0) from dwt_user_topic old full outer join ( select user_id, sum(if(dt='2020-06-14',login_count,0)) login_count, sum(if(dt='2020-06-14',order_count,0)) order_count, sum(if(dt='2020-06-14',order_amount,0)) order_amount, sum(if(dt='2020-06-14',payment_count,0)) payment_count, sum(if(dt='2020-06-14',payment_amount,0)) payment_amount, sum(if(login_count>0,1,0)) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount from dws_user_action_daycount where dt>=date_add( '2020-06-14',-30) group by user_id )new on old.user_id=new.user_id;1.建表语句
create external table dwt_sku_topic ( sku_id string comment 'sku_id', spu_id string comment 'spu_id', order_last_30d_count bigint comment '最近30日被下单次数', order_last_30d_num bigint comment '最近30日被下单件数', order_last_30d_amount decimal(16,2) comment '最近30日被下单金额', order_count bigint comment '累积被下单次数', order_num bigint comment '累积被下单件数', order_amount decimal(16,2) comment '累积被下单金额', payment_last_30d_count bigint comment '最近30日被支付次数', payment_last_30d_num bigint comment '最近30日被支付件数', payment_last_30d_amount decimal(16,2) comment '最近30日被支付金额', payment_count bigint comment '累积被支付次数', payment_num bigint comment '累积被支付件数', payment_amount decimal(16,2) comment '累积被支付金额', refund_last_30d_count bigint comment '最近三十日退款次数', refund_last_30d_num bigint comment '最近三十日退款件数', refund_last_30d_amount decimal(16,2) comment '最近三十日退款金额', refund_count bigint comment '累积退款次数', refund_num bigint comment '累积退款件数', refund_amount decimal(16,2) comment '累积退款金额', cart_last_30d_count bigint comment '最近30日被加入购物车次数', cart_count bigint comment '累积被加入购物车次数', favor_last_30d_count bigint comment '最近30日被收藏次数', favor_count bigint comment '累积被收藏次数', appraise_last_30d_good_count bigint comment '最近30日好评数', appraise_last_30d_mid_count bigint comment '最近30日中评数', appraise_last_30d_bad_count bigint comment '最近30日差评数', appraise_last_30d_default_count bigint comment '最近30日默认评价数', appraise_good_count bigint comment '累积好评数', appraise_mid_count bigint comment '累积中评数', appraise_bad_count bigint comment '累积差评数', appraise_default_count bigint comment '累积默认评价数' )COMMENT '商品主题宽表' stored as parquet location '/warehouse/gmall/dwt/dwt_sku_topic/' tblproperties ("parquet.compression"="lzo");2.数据装载
insert overwrite table dwt_sku_topic select nvl(new.sku_id,old.sku_id), sku_info.spu_id, nvl(new.order_count30,0), nvl(new.order_num30,0), nvl(new.order_amount30,0), nvl(old.order_count,0) + nvl(new.order_count,0), nvl(old.order_num,0) + nvl(new.order_num,0), nvl(old.order_amount,0) + nvl(new.order_amount,0), nvl(new.payment_count30,0), nvl(new.payment_num30,0), nvl(new.payment_amount30,0), nvl(old.payment_count,0) + nvl(new.payment_count,0), nvl(old.payment_num,0) + nvl(new.payment_num,0), nvl(old.payment_amount,0) + nvl(new.payment_amount,0), nvl(new.refund_count30,0), nvl(new.refund_num30,0), nvl(new.refund_amount30,0), nvl(old.refund_count,0) + nvl(new.refund_count,0), nvl(old.refund_num,0) + nvl(new.refund_num,0), nvl(old.refund_amount,0) + nvl(new.refund_amount,0), nvl(new.cart_count30,0), nvl(old.cart_count,0) + nvl(new.cart_count,0), nvl(new.favor_count30,0), nvl(old.favor_count,0) + nvl(new.favor_count,0), nvl(new.appraise_good_count30,0), nvl(new.appraise_mid_count30,0), nvl(new.appraise_bad_count30,0), nvl(new.appraise_default_count30,0) , nvl(old.appraise_good_count,0) + nvl(new.appraise_good_count,0), nvl(old.appraise_mid_count,0) + nvl(new.appraise_mid_count,0), nvl(old.appraise_bad_count,0) + nvl(new.appraise_bad_count,0), nvl(old.appraise_default_count,0) + nvl(new.appraise_default_count,0) from dwt_sku_topic old full outer join ( select sku_id, sum(if(dt='2020-06-14',order_count,0 )) order_count, sum(if(dt='2020-06-14',order_num ,0 )) order_num, sum(if(dt='2020-06-14',order_amount,0 )) order_amount , sum(if(dt='2020-06-14',payment_count,0 )) payment_count, sum(if(dt='2020-06-14',payment_num,0 )) payment_num, sum(if(dt='2020-06-14',payment_amount,0 )) payment_amount, sum(if(dt='2020-06-14',refund_count,0 )) refund_count, sum(if(dt='2020-06-14',refund_num,0 )) refund_num, sum(if(dt='2020-06-14',refund_amount,0 )) refund_amount, sum(if(dt='2020-06-14',cart_count,0 )) cart_count, sum(if(dt='2020-06-14',favor_count,0 )) favor_count, sum(if(dt='2020-06-14',appraise_good_count,0 )) appraise_good_count, sum(if(dt='2020-06-14',appraise_mid_count,0 ) ) appraise_mid_count , sum(if(dt='2020-06-14',appraise_bad_count,0 )) appraise_bad_count, sum(if(dt='2020-06-14',appraise_default_count,0 )) appraise_default_count, sum(order_count) order_count30 , sum(order_num) order_num30, sum(order_amount) order_amount30, sum(payment_count) payment_count30, sum(payment_num) payment_num30, sum(payment_amount) payment_amount30, sum(refund_count) refund_count30, sum(refund_num) refund_num30, sum(refund_amount) refund_amount30, sum(cart_count) cart_count30, sum(favor_count) favor_count30, sum(appraise_good_count) appraise_good_count30, sum(appraise_mid_count) appraise_mid_count30, sum(appraise_bad_count) appraise_bad_count30, sum(appraise_default_count) appraise_default_count30 from dws_sku_action_daycount where dt >= date_add ('2020-06-14', -30) group by sku_id )new on new.sku_id = old.sku_id left join (select * from dwd_dim_sku_info where dt='2020-06-14') sku_info on nvl(new.sku_id,old.sku_id)= sku_info.id;1.建表语句
create external table dwt_activity_topic( `id` string COMMENT '编号', `activity_name` string COMMENT '活动名称', `activity_type` string COMMENT '活动类型', `start_time` string COMMENT '开始时间', `end_time` string COMMENT '结束时间', `create_time` string COMMENT '创建时间', `display_day_count` bigint COMMENT '当日曝光次数', `order_day_count` bigint COMMENT '当日下单次数', `order_day_amount` decimal(20,2) COMMENT '当日下单金额', `payment_day_count` bigint COMMENT '当日支付次数', `payment_day_amount` decimal(20,2) COMMENT '当日支付金额', `display_count` bigint COMMENT '累积曝光次数', `order_count` bigint COMMENT '累积下单次数', `order_amount` decimal(20,2) COMMENT '累积下单金额', `payment_count` bigint COMMENT '累积支付次数', `payment_amount` decimal(20,2) COMMENT '累积支付金额' ) COMMENT '活动主题宽表' stored as parquet location '/warehouse/gmall/dwt/dwt_activity_topic/' tblproperties ("parquet.compression"="lzo");2.数据装载
insert overwrite table dwt_activity_topic select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.activity_type,old.activity_type), nvl(new.start_time,old.start_time), nvl(new.end_time,old.end_time), nvl(new.create_time,old.create_time), nvl(new.display_count,0), nvl(new.order_count,0), nvl(new.order_amount,0.0), nvl(new.payment_count,0), nvl(new.payment_amount,0.0), nvl(new.display_count,0)+nvl(old.display_count,0), nvl(new.order_count,0)+nvl(old.order_count,0), nvl(new.order_amount,0.0)+nvl(old.order_amount,0.0), nvl(new.payment_count,0)+nvl(old.payment_count,0), nvl(new.payment_amount,0.0)+nvl(old.payment_amount,0.0) from ( select * from dwt_activity_topic )old full outer join ( select * from dws_activity_info_daycount where dt='2020-06-14' )new on old.id=new.id;1.建表语句
create external table dwt_area_topic( `id` bigint COMMENT '编号', `province_name` string COMMENT '省份名称', `area_code` string COMMENT '地区编码', `iso_code` string COMMENT 'iso编码', `region_id` string COMMENT '地区ID', `region_name` string COMMENT '地区名称', `login_day_count` string COMMENT '当天活跃设备数', `login_last_30d_count` string COMMENT '最近30天活跃设备数', `order_day_count` bigint COMMENT '当天下单次数', `order_day_amount` decimal(16,2) COMMENT '当天下单金额', `order_last_30d_count` bigint COMMENT '最近30天下单次数', `order_last_30d_amount` decimal(16,2) COMMENT '最近30天下单金额', `payment_day_count` bigint COMMENT '当天支付次数', `payment_day_amount` decimal(16,2) COMMENT '当天支付金额', `payment_last_30d_count` bigint COMMENT '最近30天支付次数', `payment_last_30d_amount` decimal(16,2) COMMENT '最近30天支付金额' ) COMMENT '地区主题宽表' stored as parquet location '/warehouse/gmall/dwt/dwt_area_topic/' tblproperties ("parquet.compression"="lzo");2.数据装载
insert overwrite table dwt_area_topic select nvl(old.id,new.id), nvl(old.province_name,new.province_name), nvl(old.area_code,new.area_code), nvl(old.iso_code,new.iso_code), nvl(old.region_id,new.region_id), nvl(old.region_name,new.region_name), nvl(new.login_day_count,0), nvl(new.login_last_30d_count,0), nvl(new.order_day_count,0), nvl(new.order_day_amount,0.0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0.0), nvl(new.payment_day_count,0), nvl(new.payment_day_amount,0.0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0.0) from ( select * from dwt_area_topic )old full outer join ( select id, province_name, area_code, iso_code, region_id, region_name, sum(if(dt='2020-06-14',login_count,0)) login_day_count, sum(if(dt='2020-06-14',order_count,0)) order_day_count, sum(if(dt='2020-06-14',order_amount,0.0)) order_day_amount, sum(if(dt='2020-06-14',payment_count,0)) payment_day_count, sum(if(dt='2020-06-14',payment_amount,0.0)) payment_day_amount, sum(login_count) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount from dws_area_stats_daycount where dt>=date_add('2020-06-14',-30) group by id,province_name,area_code,iso_code,region_id,region_name )new on old.id=new.id;1.在/home/atguigu/bin目录下创建脚本dws_to_dwt.sh
vim dws_to_dwt.sh2.在脚本中填写如下内容
#!/bin/bash APP=gmall hive=/opt/module/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" set mapreduce.job.queuename=hive; insert overwrite table ${APP}.dwt_uv_topic select nvl(new.mid_id,old.mid_id), nvl(new.model,old.model), nvl(new.brand,old.brand), if(old.mid_id is null,'$do_date',old.login_date_first), if(new.mid_id is not null,'$do_date',old.login_date_last), if(new.mid_id is not null, new.login_count,0), nvl(old.login_count,0)+if(new.login_count>0,1,0) from ( select * from ${APP}.dwt_uv_topic )old full outer join ( select * from ${APP}.dws_uv_detail_daycount where dt='$do_date' )new on old.mid_id=new.mid_id; insert overwrite table ${APP}.dwt_user_topic select nvl(new.user_id,old.user_id), if(old.login_date_first is null and new.login_count>0,'$do_date',old.login_date_first), if(new.login_count>0,'$do_date',old.login_date_last), nvl(old.login_count,0)+if(new.login_count>0,1,0), nvl(new.login_last_30d_count,0), if(old.order_date_first is null and new.order_count>0,'$do_date',old.order_date_first), if(new.order_count>0,'$do_date',old.order_date_last), nvl(old.order_count,0)+nvl(new.order_count,0), nvl(old.order_amount,0)+nvl(new.order_amount,0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0), if(old.payment_date_first is null and new.payment_count>0,'$do_date',old.payment_date_first), if(new.payment_count>0,'$do_date',old.payment_date_last), nvl(old.payment_count,0)+nvl(new.payment_count,0), nvl(old.payment_amount,0)+nvl(new.payment_amount,0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0) from ${APP}.dwt_user_topic old full outer join ( select user_id, sum(if(dt='$do_date',login_count,0)) login_count, sum(if(dt='$do_date',order_count,0)) order_count, sum(if(dt='$do_date',order_amount,0)) order_amount, sum(if(dt='$do_date',payment_count,0)) payment_count, sum(if(dt='$do_date',payment_amount,0)) payment_amount, sum(if(login_count>0,1,0)) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount from ${APP}.dws_user_action_daycount where dt>=date_add( '$do_date',-30) group by user_id )new on old.user_id=new.user_id; insert overwrite table ${APP}.dwt_sku_topic select nvl(new.sku_id,old.sku_id), sku_info.spu_id, nvl(new.order_count30,0), nvl(new.order_num30,0), nvl(new.order_amount30,0), nvl(old.order_count,0) + nvl(new.order_count,0), nvl(old.order_num,0) + nvl(new.order_num,0), nvl(old.order_amount,0) + nvl(new.order_amount,0), nvl(new.payment_count30,0), nvl(new.payment_num30,0), nvl(new.payment_amount30,0), nvl(old.payment_count,0) + nvl(new.payment_count,0), nvl(old.payment_num,0) + nvl(new.payment_num,0), nvl(old.payment_amount,0) + nvl(new.payment_amount,0), nvl(new.refund_count30,0), nvl(new.refund_num30,0), nvl(new.refund_amount30,0), nvl(old.refund_count,0) + nvl(new.refund_count,0), nvl(old.refund_num,0) + nvl(new.refund_num,0), nvl(old.refund_amount,0) + nvl(new.refund_amount,0), nvl(new.cart_count30,0), nvl(old.cart_count,0) + nvl(new.cart_count,0), nvl(new.favor_count30,0), nvl(old.favor_count,0) + nvl(new.favor_count,0), nvl(new.appraise_good_count30,0), nvl(new.appraise_mid_count30,0), nvl(new.appraise_bad_count30,0), nvl(new.appraise_default_count30,0) , nvl(old.appraise_good_count,0) + nvl(new.appraise_good_count,0), nvl(old.appraise_mid_count,0) + nvl(new.appraise_mid_count,0), nvl(old.appraise_bad_count,0) + nvl(new.appraise_bad_count,0), nvl(old.appraise_default_count,0) + nvl(new.appraise_default_count,0) from ( select sku_id, spu_id, order_last_30d_count, order_last_30d_num, order_last_30d_amount, order_count, order_num, order_amount , payment_last_30d_count, payment_last_30d_num, payment_last_30d_amount, payment_count, payment_num, payment_amount, refund_last_30d_count, refund_last_30d_num, refund_last_30d_amount, refund_count, refund_num, refund_amount, cart_last_30d_count, cart_count, favor_last_30d_count, favor_count, appraise_last_30d_good_count, appraise_last_30d_mid_count, appraise_last_30d_bad_count, appraise_last_30d_default_count, appraise_good_count, appraise_mid_count, appraise_bad_count, appraise_default_count from ${APP}.dwt_sku_topic )old full outer join ( select sku_id, sum(if(dt='$do_date', order_count,0 )) order_count, sum(if(dt='$do_date',order_num ,0 )) order_num, sum(if(dt='$do_date',order_amount,0 )) order_amount , sum(if(dt='$do_date',payment_count,0 )) payment_count, sum(if(dt='$do_date',payment_num,0 )) payment_num, sum(if(dt='$do_date',payment_amount,0 )) payment_amount, sum(if(dt='$do_date',refund_count,0 )) refund_count, sum(if(dt='$do_date',refund_num,0 )) refund_num, sum(if(dt='$do_date',refund_amount,0 )) refund_amount, sum(if(dt='$do_date',cart_count,0 )) cart_count, sum(if(dt='$do_date',favor_count,0 )) favor_count, sum(if(dt='$do_date',appraise_good_count,0 )) appraise_good_count, sum(if(dt='$do_date',appraise_mid_count,0 ) ) appraise_mid_count , sum(if(dt='$do_date',appraise_bad_count,0 )) appraise_bad_count, sum(if(dt='$do_date',appraise_default_count,0 )) appraise_default_count, sum(order_count) order_count30 , sum(order_num) order_num30, sum(order_amount) order_amount30, sum(payment_count) payment_count30, sum(payment_num) payment_num30, sum(payment_amount) payment_amount30, sum(refund_count) refund_count30, sum(refund_num) refund_num30, sum(refund_amount) refund_amount30, sum(cart_count) cart_count30, sum(favor_count) favor_count30, sum(appraise_good_count) appraise_good_count30, sum(appraise_mid_count) appraise_mid_count30, sum(appraise_bad_count) appraise_bad_count30, sum(appraise_default_count) appraise_default_count30 from ${APP}.dws_sku_action_daycount where dt >= date_add ('$do_date', -30) group by sku_id )new on new.sku_id = old.sku_id left join (select * from ${APP}.dwd_dim_sku_info where dt='$do_date') sku_info on nvl(new.sku_id,old.sku_id)= sku_info.id; insert overwrite table ${APP}.dwt_activity_topic select nvl(new.id,old.id), nvl(new.activity_name,old.activity_name), nvl(new.activity_type,old.activity_type), nvl(new.start_time,old.start_time), nvl(new.end_time,old.end_time), nvl(new.create_time,old.create_time), nvl(new.display_count,0), nvl(new.order_count,0), nvl(new.order_amount,0.0), nvl(new.payment_count,0), nvl(new.payment_amount,0.0), nvl(new.display_count,0)+nvl(old.display_count,0), nvl(new.order_count,0)+nvl(old.order_count,0), nvl(new.order_amount,0.0)+nvl(old.order_amount,0.0), nvl(new.payment_count,0)+nvl(old.payment_count,0), nvl(new.payment_amount,0.0)+nvl(old.payment_amount,0.0) from ( select * from ${APP}.dwt_activity_topic )old full outer join ( select * from ${APP}.dws_activity_info_daycount where dt='$do_date' )new on old.id=new.id; insert overwrite table ${APP}.dwt_area_topic select nvl(old.id,new.id), nvl(old.province_name,new.province_name), nvl(old.area_code,new.area_code), nvl(old.iso_code,new.iso_code), nvl(old.region_id,new.region_id), nvl(old.region_name,new.region_name), nvl(new.login_day_count,0), nvl(new.login_last_30d_count,0), nvl(new.order_day_count,0), nvl(new.order_day_amount,0.0), nvl(new.order_last_30d_count,0), nvl(new.order_last_30d_amount,0.0), nvl(new.payment_day_count,0), nvl(new.payment_day_amount,0.0), nvl(new.payment_last_30d_count,0), nvl(new.payment_last_30d_amount,0.0) from ( select * from ${APP}.dwt_area_topic )old full outer join ( select id, province_name, area_code, iso_code, region_id, region_name, sum(if(dt='$do_date',login_count,0)) login_day_count, sum(if(dt='$do_date',order_count,0)) order_day_count, sum(if(dt='$do_date',order_amount,0.0)) order_day_amount, sum(if(dt='$do_date',payment_count,0)) payment_day_count, sum(if(dt='$do_date',payment_amount,0.0)) payment_day_amount, sum(login_count) login_last_30d_count, sum(order_count) order_last_30d_count, sum(order_amount) order_last_30d_amount, sum(payment_count) payment_last_30d_count, sum(payment_amount) payment_last_30d_amount from ${APP}.dws_area_stats_daycount where dt>=date_add('$do_date',-30) group by id,province_name,area_code,iso_code,region_id,region_name )new on old.id=new.id; " $hive -e "$sql"3.增加脚本执行权限
chmod 777 dws_to_dwt.sh1.重启行为数据通道
cluster.sh stop cluster.sh start2.修改/opt/module/applog下的application.properties
#业务日期 mock.date=2020-06-16注意:分发至其他需要生成数据的节点
xsync application.properties3.生成数据
lg.sh注意:生成数据之后,记得查看HDFS数据是否存在!
4.导入数据至ODS层
hdfs_to_ods_log.sh 2020-06-165.导入数据至DWD层
ods_to_dwd_log.sh 2020-06-166.导入数据至DWS层
dwd_to_dws.sh 2020-06-167.导入数据至DWT层
dws_to_dwt.sh 2020-06-168.修改application.properties
#业务日期 mock.date=2020-06-25重复前面3-7步即可!
1.建表语句
create external table ads_uv_count( `dt` string COMMENT '统计日期', `day_count` bigint COMMENT '当日用户数量', `wk_count` bigint COMMENT '当周用户数量', `mn_count` bigint COMMENT '当月用户数量', `is_weekend` string COMMENT 'Y,N是否是周末,用于得到本周最终结果', `is_monthend` string COMMENT 'Y,N是否是月末,用于得到本月最终结果' ) COMMENT '活跃设备数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_uv_count/';2.导入数据
insert into table ads_uv_count select '2020-06-14' dt, daycount.ct, wkcount.ct, mncount.ct, if(date_add(next_day('2020-06-14','MO'),-1)='2020-06-14','Y','N') , if(last_day('2020-06-14')='2020-06-14','Y','N') from ( select '2020-06-14' dt, count(*) ct from dwt_uv_topic where login_date_last='2020-06-14' )daycount join ( select '2020-06-14' dt, count (*) ct from dwt_uv_topic where login_date_last>=date_add(next_day('2020-06-14','MO'),-7) and login_date_last<= date_add(next_day('2020-06-14','MO'),-1) ) wkcount on daycount.dt=wkcount.dt join ( select '2020-06-14' dt, count (*) ct from dwt_uv_topic where date_format(login_date_last,'yyyy-MM')=date_format('2020-06-14','yyyy-MM') )mncount on daycount.dt=mncount.dt;建表语句
drop table if exists ads_new_mid_count; create external table ads_new_mid_count ( `create_date` string comment '创建时间' , `new_mid_count` BIGINT comment '新增设备数量' ) COMMENT '每日新增设备数量' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_new_mid_count/';导入数据
insert into table ads_new_mid_count select '2020-06-14', count(*) from dwt_uv_topic where login_date_first='2020-06-14';建表语句
create external table ads_user_retention_day_rate ( `stat_date` string comment '统计日期', `create_date` string comment '设备新增日期', `retention_day` int comment '截止当前日期留存天数', `retention_count` bigint comment '留存数量', `new_mid_count` bigint comment '设备新增数量', `retention_ratio` decimal(16,2) comment '留存率' ) COMMENT '留存率' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_retention_day_rate/';导入数据
insert into table ads_user_retention_day_rate select '2020-06-15', date_add('2020-06-15',-1), 1, --留存天数 sum(if(login_date_first=date_add('2020-06-15',-1) and login_date_last='2020-06-15',1,0)), sum(if(login_date_first=date_add('2020-06-15',-1),1,0)), sum(if(login_date_first=date_add('2020-06-15',-1) and login_date_last='2020-06-15',1,0))/sum(if(login_date_first=date_add('2020-06-15',-1),1,0))*100 from dwt_uv_topic union all select '2020-06-15', date_add('2020-06-15',-2), 2, sum(if(login_date_first=date_add('2020-06-15',-2) and login_date_last='2020-06-15',1,0)), sum(if(login_date_first=date_add('2020-06-15',-2),1,0)), sum(if(login_date_first=date_add('2020-06-15',-2) and login_date_last='2020-06-15',1,0))/sum(if(login_date_first=date_add('2020-06-15',-2),1,0))*100 from dwt_uv_topic union all select '2020-06-15', date_add('2020-06-15',-3), 3, sum(if(login_date_first=date_add('2020-06-15',-3) and login_date_last='2020-06-15',1,0)), sum(if(login_date_first=date_add('2020-06-15',-3),1,0)), sum(if(login_date_first=date_add('2020-06-15',-3) and login_date_last='2020-06-15',1,0))/sum(if(login_date_first=date_add('2020-06-15',-3),1,0))*100 from dwt_uv_topic;建表语句
create external table ads_silent_count( `dt` string COMMENT '统计日期', `silent_count` bigint COMMENT '沉默设备数' ) COMMENT '沉默用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_silent_count';导入数据
insert into table ads_silent_count select '2020-06-25', count(*) from dwt_uv_topic where login_date_first=login_date_last and login_date_last<=date_add('2020-06-25',-7);建表语句
create external table ads_back_count( `dt` string COMMENT '统计日期', `wk_dt` string COMMENT '统计日期所在周', `wastage_count` bigint COMMENT '回流设备数' ) COMMENT '本周回流用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_back_count';导入数据
insert into table ads_back_count select '2020-06-25', concat(date_add(next_day('2020-06-25','MO'),-7),'_', date_add(next_day('2020-06-25','MO'),-1)), count(*) from ( select mid_id from dwt_uv_topic where login_date_last>=date_add(next_day('2020-06-25','MO'),-7) and login_date_last<= date_add(next_day('2020-06-25','MO'),-1) and login_date_first<date_add(next_day('2020-06-25','MO'),-7) )current_wk --本周活跃减去本周新增 left join ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-06-25','MO'),-7*2) and dt<= date_add(next_day('2020-06-25','MO'),-7-1) group by mid_id )last_wk --上周活跃 on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null;建表语句
create external table ads_wastage_count( `dt` string COMMENT '统计日期', `wastage_count` bigint COMMENT '流失设备数' ) COMMENT '流失用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_wastage_count';2.导入2020-06-25数据
insert into table ads_wastage_count select '2020-06-25', count(*) from ( select mid_id from dwt_uv_topic where login_date_last<=date_add('2020-06-25',-7) group by mid_id )t1;1.建表语句
create external table ads_continuity_wk_count( `dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日期', `wk_dt` string COMMENT '持续时间', `continuity_count` bigint COMMENT '活跃设备数' ) COMMENT '最近连续三周活跃用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_continuity_wk_count';2.导入2020-06-25所在周的数据
insert into table ads_continuity_wk_count select '2020-06-25', concat(date_add(next_day('2020-06-25','MO'),-7*3),'_',date_add(next_day('2020-06-25','MO'),-1)), count(*) from ( select mid_id from ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-06-25','monday'),-7) and dt<=date_add(next_day('2020-06-25','monday'),-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-06-25','monday'),-7*2) and dt<=date_add(next_day('2020-06-25','monday'),-7-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-06-25','monday'),-7*3) and dt<=date_add(next_day('2020-06-25','monday'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2;1.建表语句
create external table ads_continuity_uv_count( `dt` string COMMENT '统计日期', `wk_dt` string COMMENT '最近7天日期', `continuity_count` bigint ) COMMENT '最近七天内连续三天活跃用户数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_continuity_uv_count';2.写出导入数据的sql语句
insert into table ads_continuity_uv_count select '2020-06-16', concat(date_add('2020-06-16',-6),'_','2020-06-16'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from dws_uv_detail_daycount where dt>=date_add('2020-06-16',-6) and dt<='2020-06-16' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id --去重,防止一个设备有两段连续超过3天的日子 )t4;1.建表语句
create external table ads_user_topic( `dt` string COMMENT '统计日期', `day_users` string COMMENT '活跃会员数', `day_new_users` string COMMENT '新增会员数', `day_new_payment_users` string COMMENT '新增消费会员数', `payment_users` string COMMENT '总付费会员数', `users` string COMMENT '总会员数', `day_users2users` decimal(16,2) COMMENT '会员活跃率', `payment_users2users` decimal(16,2) COMMENT '会员付费率', `day_new_users2users` decimal(16,2) COMMENT '会员新鲜度' ) COMMENT '会员信息表' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_topic';2.导入数据
insert into table ads_user_topic select '2020-06-14', sum(if(login_date_last='2020-06-14',1,0)), sum(if(login_date_first='2020-06-14',1,0)), sum(if(payment_date_first='2020-06-14',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last='2020-06-14',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first='2020-06-14',1,0))/sum(if(login_date_last='2020-06-14',1,0)) from dwt_user_topic;1.建表语句
create external table ads_user_action_convert_day( `dt` string COMMENT '统计日期', `home_count` bigint COMMENT '浏览首页人数', `good_detail_count` bigint COMMENT '浏览商品详情页人数', `home2good_detail_convert_ratio` decimal(16,2) COMMENT '首页到商品详情转化率', `cart_count` bigint COMMENT '加入购物车的人数', `good_detail2cart_convert_ratio` decimal(16,2) COMMENT '商品详情页到加入购物车转化率', `order_count` bigint COMMENT '下单人数', `cart2order_convert_ratio` decimal(16,2) COMMENT '加入购物车到下单转化率', `payment_amount` bigint COMMENT '支付人数', `order2payment_convert_ratio` decimal(16,2) COMMENT '下单到支付的转化率' ) COMMENT '漏斗分析' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_action_convert_day/';2.数据装载
with tmp_uv as ( select '2020-06-14' dt, sum(if(page_id = 'home',1,0)) home_count, sum(if(page_id = 'good_detail',1,0)) good_detail_count from ( select mid_id, page_id from dwd_page_log where dt='2020-06-14' and page_id in ('home','good_detail') group by mid_id,page_id ) tmp ), tmp_cop as ( select '2020-06-14' dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from dws_user_action_daycount where dt='2020-06-14' ) insert into table ads_user_action_convert_day select tmp_uv.dt, tmp_uv.home_count, tmp_uv.good_detail_count, tmp_uv.good_detail_count/tmp_uv.home_count*100, tmp_cop.cart_count, tmp_cop.cart_count/tmp_uv.good_detail_count*100, tmp_cop.order_count, tmp_cop.order_count/tmp_cop.cart_count*100, tmp_cop.payment_count, tmp_cop.payment_count/tmp_cop.order_count*100 from tmp_uv join tmp_cop on tmp_uv.dt=tmp_cop.dt;1.建表语句
create external table ads_product_info( `dt` string COMMENT '统计日期', `sku_num` string COMMENT 'sku个数', `spu_num` string COMMENT 'spu个数' ) COMMENT '商品个数信息' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_info';2.数据导入
insert into table ads_product_info select '2020-06-14' dt, sku_num, spu_num from ( select '2020-06-14' dt, count(*) sku_num from dwt_sku_topic ) tmp_sku_num join ( select '2020-06-14' dt, count(*) spu_num from ( select spu_id from dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt;1.建表语句
create external table ads_product_sale_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `payment_amount` bigint COMMENT '销量金额' ) COMMENT '商品销量排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_sale_topN';2.导入数据
insert into table ads_product_sale_topN select '2020-06-14' dt, sku_id, payment_amount from dws_sku_action_daycount where dt='2020-06-14' order by payment_amount desc limit 10;1.建表语句
create external table ads_product_favor_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `favor_count` bigint COMMENT '收藏量' ) COMMENT '商品收藏排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_favor_topN';2.数据导入
insert into table ads_product_favor_topN select '2020-06-14' dt, sku_id, favor_count from dws_sku_action_daycount where dt='2020-06-14' order by favor_count desc limit 10;1.建表语句
create external table ads_product_cart_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `cart_count` bigint COMMENT '加入购物车次数' ) COMMENT '商品加入购物车排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_cart_topN';2.数据导入
insert into table ads_product_cart_topN select '2020-06-14' dt, sku_id, cart_count from dws_sku_action_daycount where dt='2020-06-14' order by cart_count desc limit 10;1.建表语句
create external table ads_product_refund_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `refund_ratio` decimal(16,2) COMMENT '退款率' ) COMMENT '商品退款率排名' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_refund_topN';2.导入数据
insert into table ads_product_refund_topN select '2020-06-14', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from dwt_sku_topic order by refund_ratio desc limit 10;1.建表语句
create external table ads_appraise_bad_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品ID', `appraise_bad_ratio` decimal(16,2) COMMENT '差评率' ) COMMENT '商品差评率' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_appraise_bad_topN';2.导入数据
insert into table ads_appraise_bad_topN select '2020-06-14' dt, sku_id, appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_count+appraise_default_count) appraise_bad_ratio from dws_sku_action_daycount where dt='2020-06-14' order by appraise_bad_ratio desc limit 10;1.建表语句
create external table ads_order_daycount( dt string comment '统计日期', order_count bigint comment '单日下单笔数', order_amount bigint comment '单日下单金额', order_users bigint comment '单日下单用户数' ) comment '下单数目统计' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_order_daycount';2.数据导入
insert into table ads_order_daycount select '2020-06-14', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from dws_user_action_daycount where dt='2020-06-14';1.建表语句
create external table ads_payment_daycount( dt string comment '统计日期', order_count bigint comment '单日支付笔数', order_amount bigint comment '单日支付金额', payment_user_count bigint comment '单日支付人数', payment_sku_count bigint comment '单日支付商品数', payment_avg_time decimal(16,2) comment '下单到支付的平均时长,取分钟数' ) comment '支付信息统计' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_payment_daycount';2.数据导入
insert into table ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select '2020-06-14' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from dws_user_action_daycount where dt='2020-06-14' )tmp_payment join ( select '2020-06-14' dt, sum(if(payment_count>0,1,0)) payment_sku_count from dws_sku_action_daycount where dt='2020-06-14' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select '2020-06-14' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from dwd_fact_order_info where dt='2020-06-14' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt;1.建表语句
create external table ads_sale_tm_category1_stat_mn ( tm_id string comment '品牌id', category1_id string comment '1级品类id ', category1_name string comment '1级品类名称 ', buycount bigint comment '购买人数', buy_twice_last bigint comment '两次以上购买人数', buy_twice_last_ratio decimal(16,2) comment '单次复购率', buy_3times_last bigint comment '三次以上购买人数', buy_3times_last_ratio decimal(16,2) comment '多次复购率', stat_mn string comment '统计月份', stat_date string comment '统计日期' ) COMMENT '品牌复购率统计' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';2.数据导入
with tmp_order as ( select user_id, order_stats_struct.sku_id sku_id, sum(order_stats_struct.order_count) order_count from dws_user_action_daycount lateral view explode(order_detail_stats) tmp as order_stats_struct where date_format(dt,'yyyy-MM')=date_format('2020-06-14','yyyy-MM') group by user_id,order_stats_struct.sku_id ), tmp_sku as ( select id, tm_id, category1_id, category1_name from dwd_dim_sku_info where dt='2020-06-14' ) insert into table ads_sale_tm_category1_stat_mn select tm_id, category1_id, category1_name, sum(if(order_count>=1,1,0)) buycount, sum(if(order_count>=2,1,0)) buyTwiceLast, sum(if(order_count>=2,1,0))/sum( if(order_count>=1,1,0)) buyTwiceLastRatio, sum(if(order_count>=3,1,0)) buy3timeLast , sum(if(order_count>=3,1,0))/sum( if(order_count>=1,1,0)) buy3timeLastRatio , date_format('2020-06-14' ,'yyyy-MM') stat_mn, '2020-06-14' stat_date from ( select tmp_order.user_id, tmp_sku.category1_id, tmp_sku.category1_name, tmp_sku.tm_id, sum(order_count) order_count -- 一个用户有可能买一个品牌下的多个商品 from tmp_order join tmp_sku on tmp_order.sku_id=tmp_sku.id group by tmp_order.user_id,tmp_sku.category1_id,tmp_sku.category1_name,tmp_sku.tm_id )tmp group by tm_id, category1_id, category1_name;1.建表语句
create external table ads_area_topic( `dt` string COMMENT '统计日期', `id` bigint COMMENT '编号', `province_name` string COMMENT '省份名称', `area_code` string COMMENT '地区编码', `iso_code` string COMMENT 'iso编码', `region_id` string COMMENT '地区ID', `region_name` string COMMENT '地区名称', `login_day_count` bigint COMMENT '当天活跃设备数', `order_day_count` bigint COMMENT '当天下单次数', `order_day_amount` decimal(16,2) COMMENT '当天下单金额', `payment_day_count` bigint COMMENT '当天支付次数', `payment_day_amount` decimal(16,2) COMMENT '当天支付金额' ) COMMENT '地区主题信息' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_area_topic/';2.数据装载
insert into table ads_area_topic select '2020-06-14', id, province_name, area_code, iso_code, region_id, region_name, login_day_count, order_day_count, order_day_amount, payment_day_count, payment_day_amount from dwt_area_topic;1.在/home/atguigu/bin目录下创建脚本dwt_to_ads.sh
vim dwt_to_ads.sh2.在脚本中填写如下内容
#!/bin/bash hive=/opt/module/hive/bin/hive APP=gmall # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" set mapreduce.job.queuename=hive; insert into table ${APP}.ads_uv_count select '$do_date' dt, daycount.ct, wkcount.ct, mncount.ct, if(date_add(next_day('$do_date','MO'),-1)='$do_date','Y','N') , if(last_day('$do_date')='$do_date','Y','N') from ( select '$do_date' dt, count(*) ct from ${APP}.dwt_uv_topic where login_date_last='$do_date' )daycount join ( select '$do_date' dt, count (*) ct from ${APP}.dwt_uv_topic where login_date_last>=date_add(next_day('$do_date','MO'),-7) and login_date_last<= date_add(next_day('$do_date','MO'),-1) ) wkcount on daycount.dt=wkcount.dt join ( select '$do_date' dt, count (*) ct from ${APP}.dwt_uv_topic where date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-MM') )mncount on daycount.dt=mncount.dt; insert into table ${APP}.ads_new_mid_count select login_date_first, count(*) from ${APP}.dwt_uv_topic where login_date_first='$do_date' group by login_date_first; insert into table ${APP}.ads_silent_count select '$do_date', count(*) from ${APP}.dwt_uv_topic where login_date_first=login_date_last and login_date_last<=date_add('$do_date',-7); insert into table ${APP}.ads_back_count select '$do_date', concat(date_add(next_day('$do_date','MO'),-7),'_', date_add(next_day('$do_date','MO'),-1)), count(*) from ( select mid_id from ${APP}.dwt_uv_topic where login_date_last>=date_add(next_day('$do_date','MO'),-7) and login_date_last<= date_add(next_day('$do_date','MO'),-1) and login_date_first<date_add(next_day('$do_date','MO'),-7) )current_wk left join ( select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','MO'),-7*2) and dt<= date_add(next_day('$do_date','MO'),-7-1) group by mid_id )last_wk on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null; insert into table ${APP}.ads_wastage_count select '$do_date', count(*) from ( select mid_id from ${APP}.dwt_uv_topic where login_date_last<=date_add('$do_date',-7) group by mid_id )t1; insert into table ${APP}.ads_user_retention_day_rate select '$do_date',--统计日期 date_add('$do_date',-1),--新增日期 1,--留存天数 sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0)),--$do_date的1日留存数 sum(if(login_date_first=date_add('$do_date',-1),1,0)),--$do_date新增 sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-1),1,0))*100 from ${APP}.dwt_uv_topic union all select '$do_date',--统计日期 date_add('$do_date',-2),--新增日期 2,--留存天数 sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0)),--$do_date的2日留存数 sum(if(login_date_first=date_add('$do_date',-2),1,0)),--$do_date新增 sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-2),1,0))*100 from ${APP}.dwt_uv_topic union all select '$do_date',--统计日期 date_add('$do_date',-3),--新增日期 3,--留存天数 sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0)),--$do_date的3日留存数 sum(if(login_date_first=date_add('$do_date',-3),1,0)),--$do_date新增 sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',-3),1,0))*100 from ${APP}.dwt_uv_topic; insert into table ${APP}.ads_continuity_wk_count select '$do_date', concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_date','MO'),-1)), count(*) from ( select mid_id from ( select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7) and dt<=date_add(next_day('$do_date','monday'),-1) group by mid_id union all select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7*2) and dt<=date_add(next_day('$do_date','monday'),-7-1) group by mid_id union all select mid_id from ${APP}.dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7*3) and dt<=date_add(next_day('$do_date','monday'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2; insert into table ${APP}.ads_continuity_uv_count select '$do_date', concat(date_add('$do_date',-6),'_','$do_date'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from ${APP}.dws_uv_detail_daycount where dt>=date_add('$do_date',-6) and dt<='$do_date' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id )t4; insert into table ${APP}.ads_user_topic select '$do_date', sum(if(login_date_last='$do_date',1,0)), sum(if(login_date_first='$do_date',1,0)), sum(if(payment_date_first='$do_date',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last='$do_date',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1,0)) from ${APP}.dwt_user_topic; with tmp_uv as ( select '$do_date' dt, sum(if(array_contains(pages,'home'),1,0)) home_count, sum(if(array_contains(pages,'good_detail'),1,0)) good_detail_count from ( select mid_id, collect_set(page_id) pages from ${APP}.dwd_page_log where dt='$do_date' and page_id in ('home','good_detail') group by mid_id )tmp ), tmp_cop as ( select '$do_date' dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from ${APP}.dws_user_action_daycount where dt='$do_date' ) insert into table ${APP}.ads_user_action_convert_day select tmp_uv.dt, tmp_uv.home_count, tmp_uv.good_detail_count, tmp_uv.good_detail_count/tmp_uv.home_count*100, tmp_cop.cart_count, tmp_cop.cart_count/tmp_uv.good_detail_count*100, tmp_cop.order_count, tmp_cop.order_count/tmp_cop.cart_count*100, tmp_cop.payment_count, tmp_cop.payment_count/tmp_cop.order_count*100 from tmp_uv join tmp_cop on tmp_uv.dt=tmp_cop.dt; insert into table ${APP}.ads_product_info select '$do_date' dt, sku_num, spu_num from ( select '$do_date' dt, count(*) sku_num from ${APP}.dwt_sku_topic ) tmp_sku_num join ( select '$do_date' dt, count(*) spu_num from ( select spu_id from ${APP}.dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt; insert into table ${APP}.ads_product_sale_topN select '$do_date' dt, sku_id, payment_amount from ${APP}.dws_sku_action_daycount where dt='$do_date' order by payment_amount desc limit 10; insert into table ${APP}.ads_product_favor_topN select '$do_date' dt, sku_id, favor_count from ${APP}.dws_sku_action_daycount where dt='$do_date' order by favor_count desc limit 10; insert into table ${APP}.ads_product_cart_topN select '$do_date' dt, sku_id, cart_count from ${APP}.dws_sku_action_daycount where dt='$do_date' order by cart_count desc limit 10; insert into table ${APP}.ads_product_refund_topN select '$do_date', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from ${APP}.dwt_sku_topic order by refund_ratio desc limit 10; insert into table ${APP}.ads_appraise_bad_topN select '$do_date' dt, sku_id, appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_count+appraise_default_count) appraise_bad_ratio from ${APP}.dws_sku_action_daycount where dt='$do_date' order by appraise_bad_ratio desc limit 10; insert into table ${APP}.ads_order_daycount select '$do_date', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from ${APP}.dws_user_action_daycount where dt='$do_date'; insert into table ${APP}.ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select '$do_date' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from ${APP}.dws_user_action_daycount where dt='$do_date' )tmp_payment join ( select '$do_date' dt, sum(if(payment_count>0,1,0)) payment_sku_count from ${APP}.dws_sku_action_daycount where dt='$do_date' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select '$do_date' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from ${APP}.dwd_fact_order_info where dt='$do_date' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt; with tmp_order as ( select user_id, order_stats_struct.sku_id sku_id, sum(order_stats_struct.order_count) order_count from ${APP}.dws_user_action_daycount lateral view explode(order_detail_stats) tmp as order_stats_struct where date_format(dt,'yyyy-MM')=date_format('$do_date','yyyy-MM') group by user_id,order_stats_struct.sku_id ), tmp_sku as ( select id, tm_id, category1_id, category1_name from ${APP}.dwd_dim_sku_info where dt='$do_date' ) insert into table ${APP}.ads_sale_tm_category1_stat_mn select tm_id, category1_id, category1_name, sum(if(order_count>=1,1,0)) buycount, sum(if(order_count>=2,1,0)) buyTwiceLast, sum(if(order_count>=2,1,0))/sum( if(order_count>=1,1,0)) buyTwiceLastRatio, sum(if(order_count>=3,1,0)) buy3timeLast , sum(if(order_count>=3,1,0))/sum( if(order_count>=1,1,0)) buy3timeLastRatio , date_format('$do_date' ,'yyyy-MM') stat_mn, '$do_date' stat_date from ( select tmp_order.user_id, tmp_sku.category1_id, tmp_sku.category1_name, tmp_sku.tm_id, sum(order_count) order_count from tmp_order join tmp_sku on tmp_order.sku_id=tmp_sku.id group by tmp_order.user_id,tmp_sku.category1_id,tmp_sku.category1_name,tmp_sku.tm_id )tmp group by tm_id, category1_id, category1_name; insert into table ${APP}.ads_area_topic select '$do_date', id, province_name, area_code, iso_code, region_id, region_name, login_day_count, order_day_count, order_day_amount, payment_day_count, payment_day_amount from ${APP}.dwt_area_topic; " $hive -e "$sql"3.增加脚本执行权限
chmod 777 dwt_to_ads.sh