pet-project
This commit is contained in:
286
gp_pet_project.sql
Normal file
286
gp_pet_project.sql
Normal file
@@ -0,0 +1,286 @@
|
||||
-- ===============================================================
|
||||
-- SCHEMAS
|
||||
-- ===============================================================
|
||||
|
||||
CREATE SCHEMA raw;
|
||||
CREATE SCHEMA core;
|
||||
CREATE SCHEMA dm;
|
||||
|
||||
|
||||
-- ===============================================================
|
||||
-- DIMENSIONS
|
||||
-- ===============================================================
|
||||
|
||||
-- dim_user
|
||||
CREATE TABLE core.dim_user (
|
||||
user_id INT PRIMARY KEY,
|
||||
name TEXT,
|
||||
age INT,
|
||||
email TEXT,
|
||||
registration_date DATE
|
||||
)
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
INSERT INTO core.dim_user (user_id, name, age, email, registration_date)
|
||||
SELECT DISTINCT
|
||||
user_id::INT,
|
||||
name::TEXT,
|
||||
age::INT,
|
||||
email::TEXT,
|
||||
registration_date::DATE
|
||||
FROM raw.users;
|
||||
|
||||
|
||||
-- dim_course
|
||||
CREATE TABLE core.dim_course (
|
||||
course_id INT PRIMARY KEY,
|
||||
title TEXT,
|
||||
category TEXT
|
||||
)
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
INSERT INTO core.dim_course (course_id, title, category)
|
||||
SELECT DISTINCT
|
||||
id::INT,
|
||||
title::TEXT,
|
||||
category::TEXT
|
||||
FROM raw.courses;
|
||||
|
||||
|
||||
-- dim_lesson
|
||||
CREATE TABLE core.dim_lesson (
|
||||
lesson_id INT PRIMARY KEY,
|
||||
title TEXT,
|
||||
duration_min INT,
|
||||
course_id INT REFERENCES core.dim_course(course_id)
|
||||
)
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
INSERT INTO core.dim_lesson (lesson_id, title, duration_min, course_id)
|
||||
SELECT DISTINCT
|
||||
id::INT,
|
||||
title::TEXT,
|
||||
duration_min::INT,
|
||||
course_id::INT
|
||||
FROM raw.lessons;
|
||||
|
||||
|
||||
-- dim_date (optional)
|
||||
CREATE TABLE core.dim_date (
|
||||
date DATE PRIMARY KEY,
|
||||
year INT,
|
||||
month INT,
|
||||
day INT,
|
||||
weekday TEXT
|
||||
)
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
|
||||
-- ===============================================================
|
||||
-- FACT TABLES
|
||||
-- ===============================================================
|
||||
|
||||
-- fact_lesson_views
|
||||
CREATE TABLE core.fact_lesson_views (
|
||||
user_id INT REFERENCES core.dim_user(user_id),
|
||||
lesson_id INT REFERENCES core.dim_lesson(lesson_id),
|
||||
course_id INT REFERENCES core.dim_course(course_id),
|
||||
viewed_at TIMESTAMP
|
||||
)
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
INSERT INTO core.fact_lesson_views (user_id, lesson_id, course_id, viewed_at)
|
||||
SELECT DISTINCT
|
||||
lv.user_id::INT,
|
||||
lv.lesson_id::INT,
|
||||
l.course_id::INT,
|
||||
lv.viewed_at::TIMESTAMP
|
||||
FROM raw.lesson_views lv
|
||||
JOIN raw.lessons l ON l.id = lv.lesson_id;
|
||||
|
||||
|
||||
-- enrollments
|
||||
CREATE TABLE core.enrollments (
|
||||
user_id INT REFERENCES core.dim_user(user_id),
|
||||
course_id INT REFERENCES core.dim_course(course_id),
|
||||
enrolled_at TIMESTAMP
|
||||
)
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
INSERT INTO core.enrollments (user_id, course_id, enrolled_at)
|
||||
SELECT DISTINCT
|
||||
user_id::INT,
|
||||
course_id::INT,
|
||||
enrolled_at::TIMESTAMP
|
||||
FROM raw.enrollments;
|
||||
|
||||
|
||||
-- ===============================================================
|
||||
-- VITRINA 7 — Lesson Popularity
|
||||
-- ===============================================================
|
||||
|
||||
/* ===============================================================
|
||||
ЗАДАНИЕ 7: Витрина dm.lesson_popularity_summary
|
||||
|
||||
Цель:
|
||||
Показать популярность каждого урока.
|
||||
|
||||
Поля витрины:
|
||||
- lesson_id — идентификатор урока
|
||||
- lesson_title — название урока
|
||||
- course_id — идентификатор курса
|
||||
- course_title — название курса
|
||||
- total_views — общее число просмотров урока
|
||||
- unique_users — количество уникальных пользователей
|
||||
- first_view — дата первого просмотра
|
||||
- last_view — дата последнего просмотра
|
||||
|
||||
Условия выполнения:
|
||||
- JOIN fact_lesson_views, dim_lesson, dim_course
|
||||
- Группировка по lesson_id, lesson_title, course_id, course_title
|
||||
- Уникальные пользователи: COUNT(DISTINCT user_id)
|
||||
=============================================================== */
|
||||
|
||||
CREATE MATERIALIZED VIEW dm.lesson_popularity_summary AS
|
||||
SELECT
|
||||
flv.lesson_id,
|
||||
dl.title AS lesson_title,
|
||||
flv.course_id,
|
||||
dc.title AS course_title,
|
||||
COUNT(*) AS total_views,
|
||||
COUNT(DISTINCT flv.user_id) AS unique_users,
|
||||
MIN(flv.viewed_at) AS first_view,
|
||||
MAX(flv.viewed_at) AS last_view
|
||||
FROM core.fact_lesson_views flv
|
||||
LEFT JOIN core.dim_lesson dl ON dl.lesson_id = flv.lesson_id
|
||||
LEFT JOIN core.dim_course dc ON dc.course_id = flv.course_id
|
||||
GROUP BY flv.lesson_id, dl.title, flv.course_id, dc.title
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
|
||||
-- ===============================================================
|
||||
-- VITRINA — Inactive Users
|
||||
-- ===============================================================
|
||||
|
||||
/* ===============================================================
|
||||
ЗАДАНИЕ: Найти пользователей, которые не просмотрели ни одного урока.
|
||||
|
||||
Цель:
|
||||
Определить невовлечённых пользователей и потенциальный отток.
|
||||
|
||||
Поля витрины:
|
||||
- user_id — идентификатор пользователя
|
||||
- user_name — имя
|
||||
- email — почта
|
||||
- age — возраст
|
||||
- enrollments_count — количество курсов
|
||||
- registration_date — дата регистрации
|
||||
|
||||
Условия выполнения:
|
||||
- Пользователи, отсутствующие в fact_lesson_views
|
||||
- LEFT JOIN dim_user + enrollments
|
||||
- Фильтрация через NOT EXISTS
|
||||
=============================================================== */
|
||||
|
||||
CREATE INDEX idx_fact_lesson_views_user
|
||||
ON core.fact_lesson_views (user_id);
|
||||
|
||||
CREATE INDEX idx_enrollments_user_course
|
||||
ON core.enrollments (user_id, course_id);
|
||||
|
||||
CREATE MATERIALIZED VIEW dm.inactive_users_summary AS
|
||||
SELECT
|
||||
du.user_id,
|
||||
du.name AS user_name,
|
||||
du.email,
|
||||
du.age,
|
||||
COUNT(e.course_id) AS enrollments_count,
|
||||
du.registration_date
|
||||
FROM core.dim_user du
|
||||
LEFT JOIN core.enrollments e ON e.user_id = du.user_id
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM core.fact_lesson_views fv
|
||||
WHERE fv.user_id = du.user_id
|
||||
)
|
||||
GROUP BY du.user_id
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
|
||||
-- ===============================================================
|
||||
-- VITRINA 9 — Course Completion
|
||||
-- ===============================================================
|
||||
|
||||
/* ===============================================================
|
||||
ЗАДАНИЕ 9: Витрина dm.course_completion_rate
|
||||
|
||||
Цель:
|
||||
Анализировать, насколько пользователи проходят курсы до конца.
|
||||
|
||||
Поля витрины:
|
||||
- user_id — идентификатор пользователя
|
||||
- user_name — имя пользователя
|
||||
- course_id — идентификатор курса
|
||||
- course_title — название курса
|
||||
- total_lessons — количество уроков в курсе
|
||||
- lessons_viewed — число просмотренных уроков
|
||||
- completion_rate — lessons_viewed / total_lessons
|
||||
|
||||
Условия выполнения:
|
||||
- JOIN между dim_user, enrollments, dim_lesson, fact_lesson_views
|
||||
- Вынести подсчёт уроков в отдельный CTE
|
||||
- Учитывать просмотры по user_id + course_id
|
||||
- Использовать NULLIF и ROUND
|
||||
=============================================================== */
|
||||
|
||||
CREATE INDEX idx_user_course_lesson
|
||||
ON core.fact_lesson_views (user_id, course_id, lesson_id);
|
||||
|
||||
CREATE INDEX idx_course_lesson
|
||||
ON core.dim_lesson (course_id, lesson_id);
|
||||
|
||||
CREATE MATERIALIZED VIEW dm.course_completion_rate AS
|
||||
WITH viewed_stats AS (
|
||||
SELECT
|
||||
user_id,
|
||||
course_id,
|
||||
COUNT(DISTINCT lesson_id) AS lessons_viewed
|
||||
FROM core.fact_lesson_views
|
||||
GROUP BY user_id, course_id
|
||||
),
|
||||
course_lessons AS (
|
||||
SELECT
|
||||
course_id,
|
||||
COUNT(DISTINCT lesson_id) AS total_lessons
|
||||
FROM core.dim_lesson
|
||||
GROUP BY course_id
|
||||
)
|
||||
SELECT
|
||||
du.user_id,
|
||||
du.name AS user_name,
|
||||
c.course_id,
|
||||
c.title AS course_title,
|
||||
cl.total_lessons,
|
||||
COALESCE(vs.lessons_viewed, 0) AS lessons_viewed,
|
||||
ROUND(
|
||||
COALESCE(vs.lessons_viewed, 0)::numeric
|
||||
/ NULLIF(cl.total_lessons, 0),
|
||||
2
|
||||
) AS completion_rate
|
||||
FROM core.enrollments e
|
||||
JOIN core.dim_user du ON du.user_id = e.user_id
|
||||
JOIN core.dim_course c ON c.course_id = e.course_id
|
||||
JOIN course_lessons cl ON cl.course_id = c.course_id
|
||||
LEFT JOIN viewed_stats vs
|
||||
ON vs.user_id = e.user_id
|
||||
AND vs.course_id = e.course_id
|
||||
DISTRIBUTED REPLICATED;
|
||||
|
||||
|
||||
-- ===============================================================
|
||||
-- TEST SELECTS
|
||||
-- ===============================================================
|
||||
|
||||
SELECT * FROM dm.lesson_popularity_summary LIMIT 10;
|
||||
SELECT * FROM dm.inactive_users_summary LIMIT 10;
|
||||
SELECT * FROM dm.course_completion_rate LIMIT 10;
|
||||
Reference in New Issue
Block a user