pet-project
This commit is contained in:
286
gp_pet_project.sql
Normal file
286
gp_pet_project.sql
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
-- ===============================================================
|
||||||
|
-- SCHEMAS
|
||||||
|
-- ===============================================================
|
||||||
|
|
||||||
|
CREATE SCHEMA raw;
|
||||||
|
CREATE SCHEMA core;
|
||||||
|
CREATE SCHEMA dm;
|
||||||
|
|
||||||
|
|
||||||
|
-- ===============================================================
|
||||||
|
-- DIMENSIONS
|
||||||
|
-- ===============================================================
|
||||||
|
|
||||||
|
-- dim_user
|
||||||
|
CREATE TABLE core.dim_user (
|
||||||
|
user_id INT PRIMARY KEY,
|
||||||
|
name TEXT,
|
||||||
|
age INT,
|
||||||
|
email TEXT,
|
||||||
|
registration_date DATE
|
||||||
|
)
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
INSERT INTO core.dim_user (user_id, name, age, email, registration_date)
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id::INT,
|
||||||
|
name::TEXT,
|
||||||
|
age::INT,
|
||||||
|
email::TEXT,
|
||||||
|
registration_date::DATE
|
||||||
|
FROM raw.users;
|
||||||
|
|
||||||
|
|
||||||
|
-- dim_course
|
||||||
|
CREATE TABLE core.dim_course (
|
||||||
|
course_id INT PRIMARY KEY,
|
||||||
|
title TEXT,
|
||||||
|
category TEXT
|
||||||
|
)
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
INSERT INTO core.dim_course (course_id, title, category)
|
||||||
|
SELECT DISTINCT
|
||||||
|
id::INT,
|
||||||
|
title::TEXT,
|
||||||
|
category::TEXT
|
||||||
|
FROM raw.courses;
|
||||||
|
|
||||||
|
|
||||||
|
-- dim_lesson
|
||||||
|
CREATE TABLE core.dim_lesson (
|
||||||
|
lesson_id INT PRIMARY KEY,
|
||||||
|
title TEXT,
|
||||||
|
duration_min INT,
|
||||||
|
course_id INT REFERENCES core.dim_course(course_id)
|
||||||
|
)
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
INSERT INTO core.dim_lesson (lesson_id, title, duration_min, course_id)
|
||||||
|
SELECT DISTINCT
|
||||||
|
id::INT,
|
||||||
|
title::TEXT,
|
||||||
|
duration_min::INT,
|
||||||
|
course_id::INT
|
||||||
|
FROM raw.lessons;
|
||||||
|
|
||||||
|
|
||||||
|
-- dim_date (optional)
|
||||||
|
CREATE TABLE core.dim_date (
|
||||||
|
date DATE PRIMARY KEY,
|
||||||
|
year INT,
|
||||||
|
month INT,
|
||||||
|
day INT,
|
||||||
|
weekday TEXT
|
||||||
|
)
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
|
||||||
|
-- ===============================================================
|
||||||
|
-- FACT TABLES
|
||||||
|
-- ===============================================================
|
||||||
|
|
||||||
|
-- fact_lesson_views
|
||||||
|
CREATE TABLE core.fact_lesson_views (
|
||||||
|
user_id INT REFERENCES core.dim_user(user_id),
|
||||||
|
lesson_id INT REFERENCES core.dim_lesson(lesson_id),
|
||||||
|
course_id INT REFERENCES core.dim_course(course_id),
|
||||||
|
viewed_at TIMESTAMP
|
||||||
|
)
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
INSERT INTO core.fact_lesson_views (user_id, lesson_id, course_id, viewed_at)
|
||||||
|
SELECT DISTINCT
|
||||||
|
lv.user_id::INT,
|
||||||
|
lv.lesson_id::INT,
|
||||||
|
l.course_id::INT,
|
||||||
|
lv.viewed_at::TIMESTAMP
|
||||||
|
FROM raw.lesson_views lv
|
||||||
|
JOIN raw.lessons l ON l.id = lv.lesson_id;
|
||||||
|
|
||||||
|
|
||||||
|
-- enrollments
|
||||||
|
CREATE TABLE core.enrollments (
|
||||||
|
user_id INT REFERENCES core.dim_user(user_id),
|
||||||
|
course_id INT REFERENCES core.dim_course(course_id),
|
||||||
|
enrolled_at TIMESTAMP
|
||||||
|
)
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
INSERT INTO core.enrollments (user_id, course_id, enrolled_at)
|
||||||
|
SELECT DISTINCT
|
||||||
|
user_id::INT,
|
||||||
|
course_id::INT,
|
||||||
|
enrolled_at::TIMESTAMP
|
||||||
|
FROM raw.enrollments;
|
||||||
|
|
||||||
|
|
||||||
|
-- ===============================================================
|
||||||
|
-- VITRINA 7 — Lesson Popularity
|
||||||
|
-- ===============================================================
|
||||||
|
|
||||||
|
/* ===============================================================
|
||||||
|
ЗАДАНИЕ 7: Витрина dm.lesson_popularity_summary
|
||||||
|
|
||||||
|
Цель:
|
||||||
|
Показать популярность каждого урока.
|
||||||
|
|
||||||
|
Поля витрины:
|
||||||
|
- lesson_id — идентификатор урока
|
||||||
|
- lesson_title — название урока
|
||||||
|
- course_id — идентификатор курса
|
||||||
|
- course_title — название курса
|
||||||
|
- total_views — общее число просмотров урока
|
||||||
|
- unique_users — количество уникальных пользователей
|
||||||
|
- first_view — дата первого просмотра
|
||||||
|
- last_view — дата последнего просмотра
|
||||||
|
|
||||||
|
Условия выполнения:
|
||||||
|
- JOIN fact_lesson_views, dim_lesson, dim_course
|
||||||
|
- Группировка по lesson_id, lesson_title, course_id, course_title
|
||||||
|
- Уникальные пользователи: COUNT(DISTINCT user_id)
|
||||||
|
=============================================================== */
|
||||||
|
|
||||||
|
CREATE MATERIALIZED VIEW dm.lesson_popularity_summary AS
|
||||||
|
SELECT
|
||||||
|
flv.lesson_id,
|
||||||
|
dl.title AS lesson_title,
|
||||||
|
flv.course_id,
|
||||||
|
dc.title AS course_title,
|
||||||
|
COUNT(*) AS total_views,
|
||||||
|
COUNT(DISTINCT flv.user_id) AS unique_users,
|
||||||
|
MIN(flv.viewed_at) AS first_view,
|
||||||
|
MAX(flv.viewed_at) AS last_view
|
||||||
|
FROM core.fact_lesson_views flv
|
||||||
|
LEFT JOIN core.dim_lesson dl ON dl.lesson_id = flv.lesson_id
|
||||||
|
LEFT JOIN core.dim_course dc ON dc.course_id = flv.course_id
|
||||||
|
GROUP BY flv.lesson_id, dl.title, flv.course_id, dc.title
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
|
||||||
|
-- ===============================================================
|
||||||
|
-- VITRINA — Inactive Users
|
||||||
|
-- ===============================================================
|
||||||
|
|
||||||
|
/* ===============================================================
|
||||||
|
ЗАДАНИЕ: Найти пользователей, которые не просмотрели ни одного урока.
|
||||||
|
|
||||||
|
Цель:
|
||||||
|
Определить невовлечённых пользователей и потенциальный отток.
|
||||||
|
|
||||||
|
Поля витрины:
|
||||||
|
- user_id — идентификатор пользователя
|
||||||
|
- user_name — имя
|
||||||
|
- email — почта
|
||||||
|
- age — возраст
|
||||||
|
- enrollments_count — количество курсов
|
||||||
|
- registration_date — дата регистрации
|
||||||
|
|
||||||
|
Условия выполнения:
|
||||||
|
- Пользователи, отсутствующие в fact_lesson_views
|
||||||
|
- LEFT JOIN dim_user + enrollments
|
||||||
|
- Фильтрация через NOT EXISTS
|
||||||
|
=============================================================== */
|
||||||
|
|
||||||
|
CREATE INDEX idx_fact_lesson_views_user
|
||||||
|
ON core.fact_lesson_views (user_id);
|
||||||
|
|
||||||
|
CREATE INDEX idx_enrollments_user_course
|
||||||
|
ON core.enrollments (user_id, course_id);
|
||||||
|
|
||||||
|
CREATE MATERIALIZED VIEW dm.inactive_users_summary AS
|
||||||
|
SELECT
|
||||||
|
du.user_id,
|
||||||
|
du.name AS user_name,
|
||||||
|
du.email,
|
||||||
|
du.age,
|
||||||
|
COUNT(e.course_id) AS enrollments_count,
|
||||||
|
du.registration_date
|
||||||
|
FROM core.dim_user du
|
||||||
|
LEFT JOIN core.enrollments e ON e.user_id = du.user_id
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM core.fact_lesson_views fv
|
||||||
|
WHERE fv.user_id = du.user_id
|
||||||
|
)
|
||||||
|
GROUP BY du.user_id
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
|
||||||
|
-- ===============================================================
|
||||||
|
-- VITRINA 9 — Course Completion
|
||||||
|
-- ===============================================================
|
||||||
|
|
||||||
|
/* ===============================================================
|
||||||
|
ЗАДАНИЕ 9: Витрина dm.course_completion_rate
|
||||||
|
|
||||||
|
Цель:
|
||||||
|
Анализировать, насколько пользователи проходят курсы до конца.
|
||||||
|
|
||||||
|
Поля витрины:
|
||||||
|
- user_id — идентификатор пользователя
|
||||||
|
- user_name — имя пользователя
|
||||||
|
- course_id — идентификатор курса
|
||||||
|
- course_title — название курса
|
||||||
|
- total_lessons — количество уроков в курсе
|
||||||
|
- lessons_viewed — число просмотренных уроков
|
||||||
|
- completion_rate — lessons_viewed / total_lessons
|
||||||
|
|
||||||
|
Условия выполнения:
|
||||||
|
- JOIN между dim_user, enrollments, dim_lesson, fact_lesson_views
|
||||||
|
- Вынести подсчёт уроков в отдельный CTE
|
||||||
|
- Учитывать просмотры по user_id + course_id
|
||||||
|
- Использовать NULLIF и ROUND
|
||||||
|
=============================================================== */
|
||||||
|
|
||||||
|
CREATE INDEX idx_user_course_lesson
|
||||||
|
ON core.fact_lesson_views (user_id, course_id, lesson_id);
|
||||||
|
|
||||||
|
CREATE INDEX idx_course_lesson
|
||||||
|
ON core.dim_lesson (course_id, lesson_id);
|
||||||
|
|
||||||
|
CREATE MATERIALIZED VIEW dm.course_completion_rate AS
|
||||||
|
WITH viewed_stats AS (
|
||||||
|
SELECT
|
||||||
|
user_id,
|
||||||
|
course_id,
|
||||||
|
COUNT(DISTINCT lesson_id) AS lessons_viewed
|
||||||
|
FROM core.fact_lesson_views
|
||||||
|
GROUP BY user_id, course_id
|
||||||
|
),
|
||||||
|
course_lessons AS (
|
||||||
|
SELECT
|
||||||
|
course_id,
|
||||||
|
COUNT(DISTINCT lesson_id) AS total_lessons
|
||||||
|
FROM core.dim_lesson
|
||||||
|
GROUP BY course_id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
du.user_id,
|
||||||
|
du.name AS user_name,
|
||||||
|
c.course_id,
|
||||||
|
c.title AS course_title,
|
||||||
|
cl.total_lessons,
|
||||||
|
COALESCE(vs.lessons_viewed, 0) AS lessons_viewed,
|
||||||
|
ROUND(
|
||||||
|
COALESCE(vs.lessons_viewed, 0)::numeric
|
||||||
|
/ NULLIF(cl.total_lessons, 0),
|
||||||
|
2
|
||||||
|
) AS completion_rate
|
||||||
|
FROM core.enrollments e
|
||||||
|
JOIN core.dim_user du ON du.user_id = e.user_id
|
||||||
|
JOIN core.dim_course c ON c.course_id = e.course_id
|
||||||
|
JOIN course_lessons cl ON cl.course_id = c.course_id
|
||||||
|
LEFT JOIN viewed_stats vs
|
||||||
|
ON vs.user_id = e.user_id
|
||||||
|
AND vs.course_id = e.course_id
|
||||||
|
DISTRIBUTED REPLICATED;
|
||||||
|
|
||||||
|
|
||||||
|
-- ===============================================================
|
||||||
|
-- TEST SELECTS
|
||||||
|
-- ===============================================================
|
||||||
|
|
||||||
|
SELECT * FROM dm.lesson_popularity_summary LIMIT 10;
|
||||||
|
SELECT * FROM dm.inactive_users_summary LIMIT 10;
|
||||||
|
SELECT * FROM dm.course_completion_rate LIMIT 10;
|
||||||
Reference in New Issue
Block a user