文章目录
1. 表设计2. 数据示例3. 需求及数据说明4. sql实现5. sql拆分解析5.1 原始数据处理(子查询1)5.2 过滤多余重复数据(子查询2)5.3 生成报告
1. 表设计
由于我这边数据量比较大对数据进行了按用户分了100个区
CREATE TABLE `l_live_room
` (
`sso_id
` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '用户ID',
`erp_id
` varchar(64) NOT NULL DEFAULT '0',
`cid
` varchar(64) NOT NULL DEFAULT '0',
`package_id
` int(10) unsigned NOT NULL DEFAULT '0',
`unit_id
` int(10) unsigned NOT NULL DEFAULT '0',
`course_id
` int(10) unsigned DEFAULT '0',
`room_id
` varchar(30) NOT NULL,
`start_time
` int(10) unsigned NOT NULL,
`end_time
` int(10) unsigned NOT NULL,
`live_type
` smallint(5) unsigned NOT NULL DEFAULT '0',
`watch_time
` double unsigned NOT NULL DEFAULT '0' COMMENT '直播观看时长',
`current_position
` double unsigned NOT NULL DEFAULT '0' COMMENT '回放观看时长',
`total_length
` double unsigned NOT NULL DEFAULT '0',
`live_id
` int(10) unsigned NOT NULL DEFAULT '0',
`live_name
` varchar(255) DEFAULT NULL,
`interaction_type
` tinyint(3) unsigned NOT NULL DEFAULT '0' COMMENT '1:一对多 2:一对一 3 其他',
`status` tinyint(3) unsigned NOT NULL DEFAULT '0' COMMENT '1 观看直播, 2观看回放, 3观看了直播和回放',
`get_watch_time
` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '同步watch_time的时间',
`action_time
` varchar(19) NOT NULL COMMENT '用户最近一次操作时间',
`error
` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '1 表示未获取到room_id, 2 表示未查询到',
`update_time
` datetime NOT NULL DEFAULT '2000-01-01 00:00:00' COMMENT '最后一次写入阿里云日志的时间',
PRIMARY KEY (`sso_id
`,`erp_id
`,`cid
`,`package_id
`,`unit_id
`) USING BTREE,
KEY `room_id
` (`room_id
`) USING BTREE,
KEY `status` (`status`,`live_type
`,`error
`,`get_watch_time
`,`update_time
`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT
=DYNAMIC
PARTITION BY HASH (sso_id
)
(PARTITION p0
ENGINE = InnoDB,
PARTITION p1
ENGINE = InnoDB,
PARTITION p98
ENGINE = InnoDB,
PARTITION p99
ENGINE = InnoDB);
2. 数据示例
3. 需求及数据说明
获取近20周的周报周报结果包含直播观看时长live_watch_time, 回放观看时长playback_duration, 有效学习的课程(直播或回放观看超过50%)course_finish_total, 本周学习的课程总数course_total, 直播总数total, 观看直播的数量live_attendance, 直播有效观看的数量live_effective_attendance及直播总时长live_total_time若观看直播则按直播开始时间start_time 进行分周, 若观看的是回放则按最后一次观看时间action_time进行分周若当前周观看了其它周的直播, 不计在当前周的直播总数中, 但记在本周的学习课程数中原始数据中若同一节课即观看了直播又观看了回放数据存放在一条记录中, 如何区分请查看status字段的说明(这样设计是为了便于统计学习的课程数, 因为在同一周中若即看了直播又看了回放只会算作学习了一节课)
4. sql实现
SELECT
start_time
, SUBDATE
(start_time
, -6) AS end_time
,
SUM(LEAST
(watch_time
, total_time
)) AS live_watch_time
,
SUM(current_position
) AS playback_duration
,
SUM(IF(status != 0 AND (watch_time
/ total_time
> 0.5 OR current_position
/ total_length
> 0.5), 1, 0)) AS course_finish_total
,
SUM(IF(status != 0, 1, 0)) AS course_total
,
SUM(IF(status != 2, 1, 0)) AS total
,
SUM(IF(status IN (1, 3), 1, 0)) AS live_attendance
,
SUM(IF(watch_time
/ total_time
> 0.5, 1, 0)) AS live_effective_attendance
,
SUM(total_time
) AS live_total_time
FROM(
SELECT
IF(status IN (0, 1) OR type=2, start_time
, start_time2
) AS start_time
,
IF(status=2 OR (status=3 AND type=1 AND start_time
!= start_time2
), 0, watch_time
) AS watch_time
,
IF(status=2 OR (status=3 AND type=1 AND start_time
!= start_time2
), 0, total_time
) AS total_time
,
IF(status=1 OR type=2, 0, current_position
) AS current_position
,
IF(status=1 OR type=2, 0, total_length
) AS total_length
,
IF(status=2 AND type=2, 0, status) AS status,
type
FROM(
SELECT
LEFT(SUBDATE
(FROM_UNIXTIME
(start_time
),IF(FROM_UNIXTIME
(start_time
,'%w')=0, 7, FROM_UNIXTIME
(start_time
,'%w'))-1), 10) AS start_time
,
LEFT(SUBDATE
(action_time
,IF(DATE_FORMAT
(action_time
,'%w')=0, 7, DATE_FORMAT
(action_time
,'%w'))-1), 10) AS start_time2
,
watch_time
, end_time
- start_time
AS total_time
, current_position
, total_length
, status, type
FROM
`l_live_room
`
LEFT JOIN
(SELECT 1 AS type UNION SELECT 2 AS type) AS a
ON (status IN (0, 1) AND type = 1) OR status IN (2, 3)
WHERE
sso_id
= 123123
AND error
!= 1
AND GREATEST
(FROM_UNIXTIME
(start_time
), action_time
) >= SUBDATE
(CURDATE
(),IF(DATE_FORMAT
(CURDATE
(),'%w')=0, 7, DATE_FORMAT
(CURDATE
(),'%w'))-1+133)
) AS t
WHERE NOT (start_time
= start_time2
AND type = 2)
) AS t
WHERE start_time
>= SUBDATE
(CURDATE
(),IF(DATE_FORMAT
(CURDATE
(),'%w')=0, 7, DATE_FORMAT
(CURDATE
(),'%w'))-1+133)
AND start_time
<= NOW()
GROUP BY start_time
;
5. sql拆分解析
5.1 原始数据处理(子查询1)
SELECT
LEFT(SUBDATE
(FROM_UNIXTIME
(start_time
),IF(FROM_UNIXTIME
(start_time
,'%w')=0, 7, FROM_UNIXTIME
(start_time
,'%w'))-1), 10) AS start_time
,
LEFT(SUBDATE
(action_time
,IF(DATE_FORMAT
(action_time
,'%w')=0, 7, DATE_FORMAT
(action_time
,'%w'))-1), 10) AS start_time2
,
watch_time
, end_time
- start_time
AS total_time
, current_position
, total_length
, status, type
FROM
`l_live_room
`
LEFT JOIN
(SELECT 1 AS type UNION SELECT 2 AS type) AS a
ON (status IN (0, 1) AND type = 1) OR status IN (2, 3)
WHERE
sso_id
= 123123
AND error
!= 1
AND GREATEST
(FROM_UNIXTIME
(start_time
), action_time
) >= SUBDATE
(CURDATE
(),IF(DATE_FORMAT
(CURDATE
(),'%w')=0, 7, DATE_FORMAT
(CURDATE
(),'%w'))-1+133)
5.2 过滤多余重复数据(子查询2)
SELECT
IF(status IN (0, 1) OR type=2, start_time
, start_time2
) AS start_time
,
IF(status=2 OR (status=3 AND type=1 AND start_time
!= start_time2
), 0, watch_time
) AS watch_time
,
IF(status=2 OR (status=3 AND type=1 AND start_time
!= start_time2
), 0, total_time
) AS total_time
,
IF(status=1 OR type=2, 0, current_position
) AS current_position
,
IF(status=1 OR type=2, 0, total_length
) AS total_length
,
IF(status=2 AND type=2, 0, status) AS status,
type
FROM( 子查询
1 ) AS t
WHERE NOT (start_time
= start_time2
AND type = 2)
5.3 生成报告
SELECT
start_time
, SUBDATE
(start_time
, -6) AS end_time
,
SUM(LEAST
(watch_time
, total_time
)) AS live_watch_time
,
SUM(current_position
) AS playback_duration
,
SUM(IF(status != 0 AND (watch_time
/ total_time
> 0.5 OR current_position
/ total_length
> 0.5), 1, 0)) AS course_finish_total
,
SUM(IF(status != 0, 1, 0)) AS course_total
,
SUM(IF(status != 2, 1, 0)) AS total
,
SUM(IF(status IN (1, 3), 1, 0)) AS live_attendance
,
SUM(IF(watch_time
/ total_time
> 0.5, 1, 0)) AS live_effective_attendance
,
SUM(total_time
) AS live_total_time
FROM( 子查询
2 ) AS t
WHERE start_time
>= SUBDATE
(CURDATE
(),IF(DATE_FORMAT
(CURDATE
(),'%w')=0, 7, DATE_FORMAT
(CURDATE
(),'%w'))-1+133)
AND start_time
<= NOW()
GROUP BY start_time