-- =============================================================== -- SCHEMAS -- =============================================================== CREATE SCHEMA raw; CREATE SCHEMA core; CREATE SCHEMA dm; -- =============================================================== -- DIMENSIONS -- =============================================================== -- dim_user CREATE TABLE core.dim_user ( user_id INT PRIMARY KEY, name TEXT, age INT, email TEXT, registration_date DATE ) DISTRIBUTED REPLICATED; INSERT INTO core.dim_user (user_id, name, age, email, registration_date) SELECT DISTINCT user_id::INT, name::TEXT, age::INT, email::TEXT, registration_date::DATE FROM raw.users; -- dim_course CREATE TABLE core.dim_course ( course_id INT PRIMARY KEY, title TEXT, category TEXT ) DISTRIBUTED REPLICATED; INSERT INTO core.dim_course (course_id, title, category) SELECT DISTINCT id::INT, title::TEXT, category::TEXT FROM raw.courses; -- dim_lesson CREATE TABLE core.dim_lesson ( lesson_id INT PRIMARY KEY, title TEXT, duration_min INT, course_id INT REFERENCES core.dim_course(course_id) ) DISTRIBUTED REPLICATED; INSERT INTO core.dim_lesson (lesson_id, title, duration_min, course_id) SELECT DISTINCT id::INT, title::TEXT, duration_min::INT, course_id::INT FROM raw.lessons; -- dim_date (optional) CREATE TABLE core.dim_date ( date DATE PRIMARY KEY, year INT, month INT, day INT, weekday TEXT ) DISTRIBUTED REPLICATED; -- =============================================================== -- FACT TABLES -- =============================================================== -- fact_lesson_views CREATE TABLE core.fact_lesson_views ( user_id INT REFERENCES core.dim_user(user_id), lesson_id INT REFERENCES core.dim_lesson(lesson_id), course_id INT REFERENCES core.dim_course(course_id), viewed_at TIMESTAMP ) DISTRIBUTED REPLICATED; INSERT INTO core.fact_lesson_views (user_id, lesson_id, course_id, viewed_at) SELECT DISTINCT lv.user_id::INT, lv.lesson_id::INT, l.course_id::INT, lv.viewed_at::TIMESTAMP FROM raw.lesson_views lv JOIN raw.lessons l ON l.id = lv.lesson_id; -- enrollments CREATE TABLE core.enrollments ( user_id INT REFERENCES core.dim_user(user_id), course_id INT REFERENCES core.dim_course(course_id), enrolled_at TIMESTAMP ) DISTRIBUTED REPLICATED; INSERT INTO core.enrollments (user_id, course_id, enrolled_at) SELECT DISTINCT user_id::INT, course_id::INT, enrolled_at::TIMESTAMP FROM raw.enrollments; -- =============================================================== -- VITRINA 7 — Lesson Popularity -- =============================================================== /* =============================================================== ЗАДАНИЕ 7: Витрина dm.lesson_popularity_summary Цель: Показать популярность каждого урока. Поля витрины: - lesson_id — идентификатор урока - lesson_title — название урока - course_id — идентификатор курса - course_title — название курса - total_views — общее число просмотров урока - unique_users — количество уникальных пользователей - first_view — дата первого просмотра - last_view — дата последнего просмотра Условия выполнения: - JOIN fact_lesson_views, dim_lesson, dim_course - Группировка по lesson_id, lesson_title, course_id, course_title - Уникальные пользователи: COUNT(DISTINCT user_id) =============================================================== */ CREATE MATERIALIZED VIEW dm.lesson_popularity_summary AS SELECT flv.lesson_id, dl.title AS lesson_title, flv.course_id, dc.title AS course_title, COUNT(*) AS total_views, COUNT(DISTINCT flv.user_id) AS unique_users, MIN(flv.viewed_at) AS first_view, MAX(flv.viewed_at) AS last_view FROM core.fact_lesson_views flv LEFT JOIN core.dim_lesson dl ON dl.lesson_id = flv.lesson_id LEFT JOIN core.dim_course dc ON dc.course_id = flv.course_id GROUP BY flv.lesson_id, dl.title, flv.course_id, dc.title DISTRIBUTED REPLICATED; -- =============================================================== -- VITRINA — Inactive Users -- =============================================================== /* =============================================================== ЗАДАНИЕ: Найти пользователей, которые не просмотрели ни одного урока. Цель: Определить невовлечённых пользователей и потенциальный отток. Поля витрины: - user_id — идентификатор пользователя - user_name — имя - email — почта - age — возраст - enrollments_count — количество курсов - registration_date — дата регистрации Условия выполнения: - Пользователи, отсутствующие в fact_lesson_views - LEFT JOIN dim_user + enrollments - Фильтрация через NOT EXISTS =============================================================== */ CREATE INDEX idx_fact_lesson_views_user ON core.fact_lesson_views (user_id); CREATE INDEX idx_enrollments_user_course ON core.enrollments (user_id, course_id); CREATE MATERIALIZED VIEW dm.inactive_users_summary AS SELECT du.user_id, du.name AS user_name, du.email, du.age, COUNT(e.course_id) AS enrollments_count, du.registration_date FROM core.dim_user du LEFT JOIN core.enrollments e ON e.user_id = du.user_id WHERE NOT EXISTS ( SELECT 1 FROM core.fact_lesson_views fv WHERE fv.user_id = du.user_id ) GROUP BY du.user_id DISTRIBUTED REPLICATED; -- =============================================================== -- VITRINA 9 — Course Completion -- =============================================================== /* =============================================================== ЗАДАНИЕ 9: Витрина dm.course_completion_rate Цель: Анализировать, насколько пользователи проходят курсы до конца. Поля витрины: - user_id — идентификатор пользователя - user_name — имя пользователя - course_id — идентификатор курса - course_title — название курса - total_lessons — количество уроков в курсе - lessons_viewed — число просмотренных уроков - completion_rate — lessons_viewed / total_lessons Условия выполнения: - JOIN между dim_user, enrollments, dim_lesson, fact_lesson_views - Вынести подсчёт уроков в отдельный CTE - Учитывать просмотры по user_id + course_id - Использовать NULLIF и ROUND =============================================================== */ CREATE INDEX idx_user_course_lesson ON core.fact_lesson_views (user_id, course_id, lesson_id); CREATE INDEX idx_course_lesson ON core.dim_lesson (course_id, lesson_id); CREATE MATERIALIZED VIEW dm.course_completion_rate AS WITH viewed_stats AS ( SELECT user_id, course_id, COUNT(DISTINCT lesson_id) AS lessons_viewed FROM core.fact_lesson_views GROUP BY user_id, course_id ), course_lessons AS ( SELECT course_id, COUNT(DISTINCT lesson_id) AS total_lessons FROM core.dim_lesson GROUP BY course_id ) SELECT du.user_id, du.name AS user_name, c.course_id, c.title AS course_title, cl.total_lessons, COALESCE(vs.lessons_viewed, 0) AS lessons_viewed, ROUND( COALESCE(vs.lessons_viewed, 0)::numeric / NULLIF(cl.total_lessons, 0), 2 ) AS completion_rate FROM core.enrollments e JOIN core.dim_user du ON du.user_id = e.user_id JOIN core.dim_course c ON c.course_id = e.course_id JOIN course_lessons cl ON cl.course_id = c.course_id LEFT JOIN viewed_stats vs ON vs.user_id = e.user_id AND vs.course_id = e.course_id DISTRIBUTED REPLICATED; -- =============================================================== -- TEST SELECTS -- =============================================================== SELECT * FROM dm.lesson_popularity_summary LIMIT 10; SELECT * FROM dm.inactive_users_summary LIMIT 10; SELECT * FROM dm.course_completion_rate LIMIT 10;