From 6aba8b4580106bd83f369b9635df4e4340f3bdd8 Mon Sep 17 00:00:00 2001 From: "nazarenko.ae" Date: Wed, 10 Jun 2026 00:14:21 +0700 Subject: [PATCH] feat: add pet-project and gitignore --- .gitignore | 3 + pet-project/create_dm_ch.py | 132 ++++++++++++++ pet-project/create_dm_gp.py | 163 ++++++++++++++++++ pet-project/create_dm_pg.py | 163 ++++++++++++++++++ pet-project/src/ch/core_dim_course.csv | 6 + pet-project/src/ch/core_dim_lesson.csv | 19 ++ pet-project/src/ch/core_dim_user.csv | 21 +++ pet-project/src/ch/core_fact_enrollments.csv | 46 +++++ pet-project/src/ch/core_fact_lesson_views.csv | 58 +++++++ pet-project/src/gp/courses.csv | 6 + pet-project/src/gp/enrollments.csv | 46 +++++ pet-project/src/gp/lesson_views.csv | 58 +++++++ pet-project/src/gp/lessons.csv | 19 ++ pet-project/src/gp/users.csv | 21 +++ pet-project/src/pg/courses.csv | 6 + pet-project/src/pg/enrollments.csv | 46 +++++ pet-project/src/pg/lesson_views.csv | 58 +++++++ pet-project/src/pg/lessons.csv | 19 ++ pet-project/src/pg/users.csv | 21 +++ 19 files changed, 911 insertions(+) create mode 100644 .gitignore create mode 100644 pet-project/create_dm_ch.py create mode 100644 pet-project/create_dm_gp.py create mode 100644 pet-project/create_dm_pg.py create mode 100644 pet-project/src/ch/core_dim_course.csv create mode 100644 pet-project/src/ch/core_dim_lesson.csv create mode 100644 pet-project/src/ch/core_dim_user.csv create mode 100644 pet-project/src/ch/core_fact_enrollments.csv create mode 100644 pet-project/src/ch/core_fact_lesson_views.csv create mode 100644 pet-project/src/gp/courses.csv create mode 100644 pet-project/src/gp/enrollments.csv create mode 100644 pet-project/src/gp/lesson_views.csv create mode 100644 pet-project/src/gp/lessons.csv create mode 100644 pet-project/src/gp/users.csv create mode 100644 pet-project/src/pg/courses.csv create mode 100644 pet-project/src/pg/enrollments.csv create mode 100644 pet-project/src/pg/lesson_views.csv create mode 100644 pet-project/src/pg/lessons.csv create mode 100644 pet-project/src/pg/users.csv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..37b40df --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycahe__ +.venv +.vscode diff --git a/pet-project/create_dm_ch.py b/pet-project/create_dm_ch.py new file mode 100644 index 0000000..df9bfdf --- /dev/null +++ b/pet-project/create_dm_ch.py @@ -0,0 +1,132 @@ +import os +from pathlib import Path + +from pyspark.sql import DataFrame, SparkSession +from pyspark.sql import functions as F + +spark = SparkSession.builder.appName("create_dm_gp").master("local[*]").getOrCreate() + + +def read_csv(csv_filename: str, path: Path = Path("src/ch")) -> DataFrame: + return spark.read.csv( + path=str(path / csv_filename), + header=True, + inferSchema=True, + ) + + +core_dim_user = read_csv("core_dim_user.csv") +core_dim_course = read_csv("core_dim_course.csv") +core_dim_lesson = read_csv("core_dim_lesson.csv") +core_fact_enrollments = read_csv("core_fact_enrollments.csv") +core_fact_lesson_views = read_csv("core_fact_lesson_views.csv") + + +def lesson_popularity_summary() -> DataFrame: + result = ( + core_dim_lesson + .alias("l") + .join( + core_dim_course.alias("c"), + F.col("c.course_id") == F.col("l.course_id"), + how="inner", + ) + .join( + core_fact_lesson_views.alias("lv"), + F.col("l.lesson_id") == F.col("lv.lesson_id"), + how="left", + ) + .groupBy(["l.lesson_id", "l.title", "c.title", "l.course_id"]) + .agg( + F.count("lv.lesson_id").alias("total_views"), + F.count_distinct("lv.user_id").alias("unique_users"), + F.min("lv.viewed_at").alias("first_view"), + F.max("lv.viewed_at").alias("last_view"), + ) + .select( + "lesson_id", + F.col("l.title").alias("lesson_title"), + "course_id", + F.col("c.title").alias("course_title"), + "total_views", + "unique_users", + "first_view", + "last_view", + ) + ) + + return result + + +def inactive_users_summary() -> DataFrame: + active_users = core_fact_lesson_views.select("user_id").distinct() + inactive_users = core_dim_user.join(active_users, on="user_id", how="left_anti") + + registered_courses_count = core_fact_enrollments.groupBy("user_id").agg( + F.count_distinct("course_id").alias("registered_courses_count") + ) + + result = inactive_users.join( + registered_courses_count, on="user_id", how="left" + ).select( + "user_id", + "name", + "email", + "age", + F.coalesce("registered_courses_count", F.lit(0)).alias( + "registered_courses_count" + ), + "registration_date", + ) + + return result + + +def course_completion_rate() -> DataFrame: + course_stats = core_dim_lesson.groupBy("course_id").agg( + F.count("lesson_id").alias("lessons_in_course") + ) + + view_stats = core_fact_lesson_views.groupBy("user_id", "course_id").agg( + F.count_distinct("lesson_id").alias("lessons_viewed") + ) + + result = ( + core_dim_user + .alias("u") + .join( + view_stats.alias("vs"), + F.col("u.user_id") == F.col("vs.user_id"), + how="left", + ) + .join(core_dim_course.alias("c"), on="course_id", how="inner") + .join(course_stats.alias("cs"), on="course_id", how="left") + .select( + "u.user_id", + F.col("u.name").alias("user_name"), + "c.course_id", + F.col("c.title").alias("course_title"), + "lessons_in_course", + "lessons_viewed", + F.round( + F.coalesce("lessons_viewed", F.lit(0)) / F.col("lessons_in_course"), + 2, + ).alias("completion_rate"), + ) + ) + + return result + + +output_dir = Path("marts/ch") +os.makedirs(output_dir, exist_ok=True) + +lesson_popularity_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "lesson_popularity_summary") +) +inactive_users_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "inactive_users_summary") +) +course_completion_rate().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "course_completion_rate") +) diff --git a/pet-project/create_dm_gp.py b/pet-project/create_dm_gp.py new file mode 100644 index 0000000..3ac5e47 --- /dev/null +++ b/pet-project/create_dm_gp.py @@ -0,0 +1,163 @@ +import os +from pathlib import Path + +import pyspark.sql.functions as F +from pyspark.sql import DataFrame, SparkSession + +spark = SparkSession.builder.appName("create_dm_pg").master("local[*]").getOrCreate() + + +def read_csv(csv_filename: str, path: Path = Path("src/gp")) -> DataFrame: + return spark.read.csv( + path=str(path / csv_filename), + header=True, + inferSchema=True, + ) + + +users = read_csv("users.csv").withColumnRenamed("id", "user_id") +courses = read_csv("courses.csv").withColumnRenamed("id", "course_id") +lessons = read_csv("lessons.csv").withColumnRenamed("id", "lesson_id") +enrollments = read_csv("enrollments.csv") +lesson_views = read_csv("lesson_views.csv") + + +def user_activity_summary() -> DataFrame: + enrollment_stats = enrollments.groupBy("user_id").agg( + F.count("course_id").alias("enrollment_count") + ) + + view_stats = lesson_views.groupBy("user_id").agg( + F.count_distinct("lesson_id").alias("viewed_lessons_count"), + F.min("viewed_at").alias("first_view"), + F.max("viewed_at").alias("last_view"), + ) + + possible_lessons = ( + enrollments + .join(lessons, on="course_id", how="inner") + .groupBy("user_id") + .agg(F.count_distinct("lesson_id").alias("possible_lessons")) + ) + + result = ( + users + .join(enrollment_stats, on="user_id", how="inner") + .join(view_stats, on="user_id", how="left") + .join(possible_lessons, on="user_id", how="left") + ) + + result = result.withColumn( + "engagement_rate", + F.concat( + F.round( + F.coalesce(F.col("viewed_lessons_count"), F.lit(0)) + * 100.0 + / F.col("possible_lessons"), + 2, + ), + F.lit("%"), + ), + ) + + result = result.select( + "user_id", + "name", + "enrollment_count", + "viewed_lessons_count", + "first_view", + "last_view", + "engagement_rate", + ) + + return result + + +def course_summary() -> DataFrame: + enrollment_stats = enrollments.groupBy("course_id").agg( + F.count_distinct("user_id").alias("unique_users") + ) + + view_stats = ( + lessons + .alias("l") + .join(lesson_views.alias("lv"), on="lesson_id", how="left") + .groupBy("course_id") + .agg( + F.count_distinct("l.lesson_id").alias("lessons_count"), + F.count("lv.lesson_id").alias("total_views"), + F.min("lv.viewed_at").alias("first_view"), + F.max("lv.viewed_at").alias("last_view"), + ) + ) + + result = courses.join(enrollment_stats, on="course_id", how="inner").join( + view_stats, on="course_id", how="inner" + ) + + result = result.withColumn( + "avg_views_per_user", + F.round( + F.coalesce(F.col("total_views"), F.lit(0)) / F.col("unique_users"), + 2, + ), + ).select( + "course_id", + "title", + "unique_users", + "lessons_count", + "total_views", + "avg_views_per_user", + "first_view", + "last_view", + ) + + return result + + +def platform_summary() -> DataFrame: + total_users = users.agg(F.count("*").alias("total_users")) + total_courses = courses.agg(F.count("*").alias("total_courses")) + total_lessons = lessons.agg(F.count("*").alias("total_lessons")) + users_with_views = lesson_views.agg( + F.count_distinct("user_id").alias("users_with_views") + ) + + lessons_per_course = lessons.groupBy("course_id").agg( + F.count_distinct("lesson_id").alias("lessons_count") + ) + avg_lessons_per_course = lessons_per_course.agg( + F.round(F.avg("lessons_count"), 2).alias("avg_lessons_per_course") + ) + + views_per_lesson = lesson_views.groupBy("lesson_id").agg( + F.count("*").alias("total_views") + ) + avg_views_per_lesson = views_per_lesson.agg( + F.round(F.avg("total_views"), 2).alias("avg_views_per_lesson") + ) + + result = ( + total_users + .crossJoin(total_courses) + .crossJoin(total_lessons) + .crossJoin(users_with_views) + .crossJoin(avg_lessons_per_course) + .crossJoin(avg_views_per_lesson) + ) + + return result + + +output_dir = Path("marts/gp") +os.makedirs(output_dir, exist_ok=True) + +user_activity_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "user_activity_summary") +) +course_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "course_summary") +) +platform_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "platform_summary") +) diff --git a/pet-project/create_dm_pg.py b/pet-project/create_dm_pg.py new file mode 100644 index 0000000..4222b7c --- /dev/null +++ b/pet-project/create_dm_pg.py @@ -0,0 +1,163 @@ +import os +from pathlib import Path + +import pyspark.sql.functions as F +from pyspark.sql import DataFrame, SparkSession + +spark = SparkSession.builder.appName("create_dm_pg").master("local[*]").getOrCreate() + + +def read_csv(csv_filename: str, path: Path = Path("src/pg")) -> DataFrame: + return spark.read.csv( + path=str(path / csv_filename), + header=True, + inferSchema=True, + ) + + +users = read_csv("users.csv").withColumnRenamed("id", "user_id") +courses = read_csv("courses.csv").withColumnRenamed("id", "course_id") +lessons = read_csv("lessons.csv").withColumnRenamed("id", "lesson_id") +enrollments = read_csv("enrollments.csv") +lesson_views = read_csv("lesson_views.csv") + + +def user_activity_summary() -> DataFrame: + enrollment_stats = enrollments.groupBy("user_id").agg( + F.count("course_id").alias("enrollment_count") + ) + + view_stats = lesson_views.groupBy("user_id").agg( + F.count_distinct("lesson_id").alias("viewed_lessons_count"), + F.min("viewed_at").alias("first_view"), + F.max("viewed_at").alias("last_view"), + ) + + possible_lessons = ( + enrollments + .join(lessons, on="course_id", how="inner") + .groupBy("user_id") + .agg(F.count_distinct("lesson_id").alias("possible_lessons")) + ) + + result = ( + users + .join(enrollment_stats, on="user_id", how="inner") + .join(view_stats, on="user_id", how="left") + .join(possible_lessons, on="user_id", how="left") + ) + + result = result.withColumn( + "engagement_rate", + F.concat( + F.round( + F.coalesce(F.col("viewed_lessons_count"), F.lit(0)) + * 100.0 + / F.col("possible_lessons"), + 2, + ), + F.lit("%"), + ), + ) + + result = result.select( + "user_id", + "name", + "enrollment_count", + "viewed_lessons_count", + "first_view", + "last_view", + "engagement_rate", + ) + + return result + + +def course_summary() -> DataFrame: + enrollment_stats = enrollments.groupBy("course_id").agg( + F.count_distinct("user_id").alias("unique_users") + ) + + view_stats = ( + lessons + .alias("l") + .join(lesson_views.alias("lv"), on="lesson_id", how="left") + .groupBy("course_id") + .agg( + F.count_distinct("l.lesson_id").alias("lessons_count"), + F.count("lv.lesson_id").alias("total_views"), + F.min("lv.viewed_at").alias("first_view"), + F.max("lv.viewed_at").alias("last_view"), + ) + ) + + result = courses.join(enrollment_stats, on="course_id", how="inner").join( + view_stats, on="course_id", how="inner" + ) + + result = result.withColumn( + "avg_views_per_user", + F.round( + F.coalesce(F.col("total_views"), F.lit(0)) / F.col("unique_users"), + 2, + ), + ).select( + "course_id", + "title", + "unique_users", + "lessons_count", + "total_views", + "avg_views_per_user", + "first_view", + "last_view", + ) + + return result + + +def platform_summary() -> DataFrame: + total_users = users.agg(F.count("*").alias("total_users")) + total_courses = courses.agg(F.count("*").alias("total_courses")) + total_lessons = lessons.agg(F.count("*").alias("total_lessons")) + users_with_views = lesson_views.agg( + F.count_distinct("user_id").alias("users_with_views") + ) + + lessons_per_course = lessons.groupBy("course_id").agg( + F.count_distinct("lesson_id").alias("lessons_count") + ) + avg_lessons_per_course = lessons_per_course.agg( + F.round(F.avg("lessons_count"), 2).alias("avg_lessons_per_course") + ) + + views_per_lesson = lesson_views.groupBy("lesson_id").agg( + F.count("*").alias("total_views") + ) + avg_views_per_lesson = views_per_lesson.agg( + F.round(F.avg("total_views"), 2).alias("avg_views_per_lesson") + ) + + result = ( + total_users + .crossJoin(total_courses) + .crossJoin(total_lessons) + .crossJoin(users_with_views) + .crossJoin(avg_lessons_per_course) + .crossJoin(avg_views_per_lesson) + ) + + return result + + +output_dir = Path("marts/pg") +os.makedirs(output_dir, exist_ok=True) + +user_activity_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "user_activity_summary") +) +course_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "course_summary") +) +platform_summary().write.mode("overwrite").option("header", "true").csv( + str(output_dir / "platform_summary") +) diff --git a/pet-project/src/ch/core_dim_course.csv b/pet-project/src/ch/core_dim_course.csv new file mode 100644 index 0000000..21ffd6c --- /dev/null +++ b/pet-project/src/ch/core_dim_course.csv @@ -0,0 +1,6 @@ +course_id,title,category,created_at +5,Основы машинного обучения,data,2023-03-15 +1,SQL для начинающих,data,2023-01-01 +2,Python для анализа данных,programming,2023-01-15 +3,BI с нуля,business,2023-03-01 +4,Excel для аналитиков,business,2023-02-01 diff --git a/pet-project/src/ch/core_dim_lesson.csv b/pet-project/src/ch/core_dim_lesson.csv new file mode 100644 index 0000000..a37985b --- /dev/null +++ b/pet-project/src/ch/core_dim_lesson.csv @@ -0,0 +1,19 @@ +lesson_id,title,duration_min,course_id +1,SELECT и FROM,10,1 +2,JOIN,15,1 +3,WHERE и фильтрация,12,1 +4,Агрегации и GROUP BY,18,1 +5,Подзапросы,25,1 +6,Pandas основы,20,2 +7,NumPy,15,2 +9,Основы BI,12,3 +10,Метрики и дашборды,18,3 +11,Power BI введение,20,3 +12,Формулы в Excel,15,4 +13,Сводные таблицы,18,4 +14,Графики и диаграммы,14,4 +15,Введение в ML,20,5 +16,Линейная регрессия,30,5 +17,Классификация,28,5 +18,Кластеризация,25,5 +8,Визуализация данных,22,2 diff --git a/pet-project/src/ch/core_dim_user.csv b/pet-project/src/ch/core_dim_user.csv new file mode 100644 index 0000000..b60d446 --- /dev/null +++ b/pet-project/src/ch/core_dim_user.csv @@ -0,0 +1,21 @@ +user_id,name,age,email,registration_date +20,Tina,25,tina@mail.com,2023-04-18 +1,Alice,25,alice@mail.com,2023-01-10 +2,Bob,30,bob@gmail.com,2023-02-05 +3,Charlie,22,charlie@mail.com,2023-02-20 +4,Diana,28,diana@mail.com,2023-03-01 +5,Ethan,35,ethan@gmail.com,2023-03-10 +6,Frank,29,frank@mail.com,2023-01-05 +7,Grace,26,grace@gmail.com,2023-01-12 +8,Henry,32,henry@mail.com,2023-01-20 +9,Iris,24,iris@gmail.com,2023-02-10 +10,Jack,27,jack@mail.com,2023-04-15 +11,Kelly,23,kelly@gmail.com,2023-04-20 +12,Leo,31,leo@mail.com,2023-01-15 +13,Mia,26,mia@gmail.com,2023-01-18 +14,Nina,28,nina@mail.com,2023-01-08 +15,Oscar,33,oscar@gmail.com,2023-01-10 +16,Paul,29,paul@mail.com,2023-02-15 +17,Quinn,24,quinn@gmail.com,2023-03-20 +18,Rita,27,rita@mail.com,2023-04-01 +19,Sam,30,sam@gmail.com,2023-04-10 diff --git a/pet-project/src/ch/core_fact_enrollments.csv b/pet-project/src/ch/core_fact_enrollments.csv new file mode 100644 index 0000000..f8e6441 --- /dev/null +++ b/pet-project/src/ch/core_fact_enrollments.csv @@ -0,0 +1,46 @@ +user_id,course_id,enrolled_at +1,1,2023-01-15 00:00:00.000 +1,2,2023-02-01 00:00:00.000 +1,4,2023-02-10 00:00:00.000 +2,1,2023-01-20 00:00:00.000 +2,3,2023-03-15 00:00:00.000 +3,1,2023-02-25 00:00:00.000 +3,2,2023-03-05 00:00:00.000 +4,2,2023-03-10 00:00:00.000 +4,3,2023-04-01 00:00:00.000 +5,5,2023-03-20 00:00:00.000 +6,1,2023-01-06 00:00:00.000 +6,2,2023-01-10 00:00:00.000 +6,3,2023-03-08 00:00:00.000 +6,4,2023-02-05 00:00:00.000 +7,1,2023-01-13 00:00:00.000 +7,2,2023-02-01 00:00:00.000 +7,3,2023-03-10 00:00:00.000 +8,1,2023-01-22 00:00:00.000 +8,2,2023-01-22 00:00:00.000 +8,3,2023-01-22 00:00:00.000 +9,1,2023-02-15 00:00:00.000 +9,4,2023-02-12 00:00:00.000 +10,1,2023-04-16 00:00:00.000 +11,2,2023-04-21 00:00:00.000 +12,1,2023-01-16 00:00:00.000 +12,3,2023-03-10 00:00:00.000 +13,2,2023-01-19 00:00:00.000 +13,4,2023-02-20 00:00:00.000 +14,1,2023-01-09 00:00:00.000 +14,2,2023-01-09 00:00:00.000 +14,3,2023-01-09 00:00:00.000 +14,4,2023-02-01 00:00:00.000 +14,5,2023-03-20 00:00:00.000 +15,1,2023-01-11 00:00:00.000 +15,3,2023-03-05 00:00:00.000 +16,2,2023-02-16 00:00:00.000 +16,4,2023-02-16 00:00:00.000 +17,3,2023-03-22 00:00:00.000 +17,5,2023-03-25 00:00:00.000 +18,1,2023-04-02 00:00:00.000 +18,4,2023-04-05 00:00:00.000 +19,1,2023-04-11 00:00:00.000 +19,2,2023-04-11 00:00:00.000 +20,2,2023-04-19 00:00:00.000 +15,2,2023-02-01 00:00:00.000 diff --git a/pet-project/src/ch/core_fact_lesson_views.csv b/pet-project/src/ch/core_fact_lesson_views.csv new file mode 100644 index 0000000..493efdc --- /dev/null +++ b/pet-project/src/ch/core_fact_lesson_views.csv @@ -0,0 +1,58 @@ +user_id,lesson_id,course_id,viewed_at +13,12,4,2023-02-21 09:00:00.000 +1,1,1,2023-01-16 10:00:00.000 +1,2,1,2023-01-16 10:15:00.000 +1,3,1,2023-01-17 09:00:00.000 +1,4,1,2023-01-17 09:30:00.000 +1,4,1,2023-01-17 11:00:00.000 +1,4,1,2023-01-18 10:00:00.000 +1,6,2,2023-02-02 14:00:00.000 +1,12,4,2023-02-15 10:00:00.000 +1,13,4,2023-02-15 10:30:00.000 +2,1,1,2023-01-21 09:00:00.000 +2,9,3,2023-03-16 10:00:00.000 +2,10,3,2023-03-16 10:30:00.000 +3,1,1,2023-02-26 11:00:00.000 +3,2,1,2023-02-26 11:20:00.000 +3,6,2,2023-03-06 12:00:00.000 +4,6,2,2023-03-12 14:00:00.000 +4,7,2,2023-03-13 09:00:00.000 +4,9,3,2023-04-02 10:00:00.000 +4,10,3,2023-04-03 10:00:00.000 +5,15,5,2023-03-21 10:00:00.000 +5,16,5,2023-03-22 11:00:00.000 +6,1,1,2023-01-07 10:00:00.000 +6,2,1,2023-01-07 10:15:00.000 +6,3,1,2023-01-08 09:00:00.000 +6,4,1,2023-01-08 09:30:00.000 +6,5,1,2023-01-09 10:00:00.000 +6,6,2,2023-01-11 14:00:00.000 +6,7,2,2023-01-12 10:00:00.000 +6,8,2,2023-01-13 11:00:00.000 +6,9,3,2023-03-09 11:00:00.000 +6,12,4,2023-02-06 09:00:00.000 +6,13,4,2023-02-06 09:30:00.000 +6,14,4,2023-02-07 10:00:00.000 +7,1,1,2023-01-14 10:00:00.000 +7,2,1,2023-01-14 10:20:00.000 +7,3,1,2023-01-15 09:00:00.000 +7,9,3,2023-03-11 14:00:00.000 +7,10,3,2023-03-11 14:30:00.000 +9,1,1,2023-02-16 11:00:00.000 +9,12,4,2023-02-13 10:00:00.000 +9,13,4,2023-02-13 10:30:00.000 +12,1,1,2023-01-17 10:00:00.000 +12,1,1,2023-04-10 11:00:00.000 +12,2,1,2023-01-17 10:20:00.000 +12,3,1,2023-04-10 11:30:00.000 +12,9,3,2023-03-12 10:00:00.000 +13,6,2,2023-01-20 14:00:00.000 +13,7,2,2023-01-21 10:00:00.000 +14,1,1,2023-01-09 08:00:00.000 +14,2,1,2023-01-09 08:15:00.000 +14,3,1,2023-01-10 10:00:00.000 +14,4,1,2023-01-10 10:30:00.000 +14,5,1,2023-01-11 09:00:00.000 +14,6,2,2023-01-10 14:00:00.000 +14,7,2,2023-01-11 10:00:00.000 +14,8,2,2023-01-12 11:00:00.000 diff --git a/pet-project/src/gp/courses.csv b/pet-project/src/gp/courses.csv new file mode 100644 index 0000000..7c6a3d3 --- /dev/null +++ b/pet-project/src/gp/courses.csv @@ -0,0 +1,6 @@ +id,title,category,created_at +1,SQL для начинающих,data,2023-01-01 +2,Python для анализа данных,programming,2023-01-15 +3,BI с нуля,business,2023-03-01 +4,Excel для аналитиков,business,2023-02-01 +5,Основы машинного обучения,data,2023-03-15 diff --git a/pet-project/src/gp/enrollments.csv b/pet-project/src/gp/enrollments.csv new file mode 100644 index 0000000..aea52c8 --- /dev/null +++ b/pet-project/src/gp/enrollments.csv @@ -0,0 +1,46 @@ +id,user_id,course_id,enrolled_at +3,2,1,2023-01-20 +4,3,2,2023-03-05 +5,4,3,2023-04-01 +6,6,1,2023-01-06 +7,6,2,2023-01-10 +8,6,4,2023-02-05 +9,7,1,2023-01-13 +10,7,3,2023-03-10 +11,8,1,2023-01-22 +12,8,2,2023-01-22 +13,8,3,2023-01-22 +14,9,4,2023-02-12 +15,10,1,2023-04-16 +18,13,2,2023-01-19 +19,13,4,2023-02-20 +27,16,2,2023-02-16 +28,16,4,2023-02-16 +31,18,1,2023-04-02 +32,19,1,2023-04-11 +33,19,2,2023-04-11 +37,2,3,2023-03-15 +38,3,1,2023-02-25 +39,4,2,2023-03-10 +40,6,3,2023-03-08 +41,7,2,2023-02-01 +42,9,1,2023-02-15 +45,18,4,2023-04-05 +1,1,1,2023-01-15 +2,1,2,2023-02-01 +16,11,2,2023-04-21 +17,12,1,2023-01-16 +20,14,1,2023-01-09 +21,14,2,2023-01-09 +22,14,3,2023-01-09 +23,14,4,2023-02-01 +24,14,5,2023-03-20 +25,15,1,2023-01-11 +26,15,3,2023-03-05 +29,17,3,2023-03-22 +30,17,5,2023-03-25 +34,20,2,2023-04-19 +35,5,5,2023-03-20 +36,1,4,2023-02-10 +43,12,3,2023-03-10 +44,15,2,2023-02-01 diff --git a/pet-project/src/gp/lesson_views.csv b/pet-project/src/gp/lesson_views.csv new file mode 100644 index 0000000..18aca4c --- /dev/null +++ b/pet-project/src/gp/lesson_views.csv @@ -0,0 +1,58 @@ +id,user_id,lesson_id,viewed_at +1,1,1,2023-01-16 10:00:00.000000 +2,1,2,2023-01-16 10:15:00.000000 +3,1,3,2023-01-17 09:00:00.000000 +4,1,4,2023-01-17 09:30:00.000000 +5,1,4,2023-01-17 11:00:00.000000 +6,1,4,2023-01-18 10:00:00.000000 +7,1,6,2023-02-02 14:00:00.000000 +8,1,12,2023-02-15 10:00:00.000000 +9,1,13,2023-02-15 10:30:00.000000 +20,5,15,2023-03-21 10:00:00.000000 +21,5,16,2023-03-22 11:00:00.000000 +42,12,1,2023-01-17 10:00:00.000000 +43,12,2,2023-01-17 10:20:00.000000 +44,12,1,2023-04-10 11:00:00.000000 +45,12,3,2023-04-10 11:30:00.000000 +46,12,9,2023-03-12 10:00:00.000000 +50,14,1,2023-01-09 08:00:00.000000 +51,14,2,2023-01-09 08:15:00.000000 +52,14,3,2023-01-10 10:00:00.000000 +53,14,4,2023-01-10 10:30:00.000000 +54,14,5,2023-01-11 09:00:00.000000 +55,14,6,2023-01-10 14:00:00.000000 +56,14,7,2023-01-11 10:00:00.000000 +57,14,8,2023-01-12 11:00:00.000000 +10,2,1,2023-01-21 09:00:00.000000 +11,2,9,2023-03-16 10:00:00.000000 +12,2,10,2023-03-16 10:30:00.000000 +13,3,6,2023-03-06 12:00:00.000000 +14,3,1,2023-02-26 11:00:00.000000 +15,3,2,2023-02-26 11:20:00.000000 +16,4,9,2023-04-02 10:00:00.000000 +17,4,10,2023-04-03 10:00:00.000000 +18,4,6,2023-03-12 14:00:00.000000 +19,4,7,2023-03-13 09:00:00.000000 +22,6,1,2023-01-07 10:00:00.000000 +23,6,2,2023-01-07 10:15:00.000000 +24,6,3,2023-01-08 09:00:00.000000 +25,6,4,2023-01-08 09:30:00.000000 +26,6,5,2023-01-09 10:00:00.000000 +27,6,6,2023-01-11 14:00:00.000000 +28,6,7,2023-01-12 10:00:00.000000 +29,6,8,2023-01-13 11:00:00.000000 +30,6,12,2023-02-06 09:00:00.000000 +31,6,13,2023-02-06 09:30:00.000000 +32,6,14,2023-02-07 10:00:00.000000 +33,6,9,2023-03-09 11:00:00.000000 +34,7,1,2023-01-14 10:00:00.000000 +35,7,2,2023-01-14 10:20:00.000000 +36,7,3,2023-01-15 09:00:00.000000 +37,7,9,2023-03-11 14:00:00.000000 +38,7,10,2023-03-11 14:30:00.000000 +39,9,12,2023-02-13 10:00:00.000000 +40,9,13,2023-02-13 10:30:00.000000 +41,9,1,2023-02-16 11:00:00.000000 +47,13,6,2023-01-20 14:00:00.000000 +48,13,7,2023-01-21 10:00:00.000000 +49,13,12,2023-02-21 09:00:00.000000 diff --git a/pet-project/src/gp/lessons.csv b/pet-project/src/gp/lessons.csv new file mode 100644 index 0000000..217190b --- /dev/null +++ b/pet-project/src/gp/lessons.csv @@ -0,0 +1,19 @@ +id,course_id,title,duration_min +1,1,SELECT и FROM,10 +2,1,JOIN,15 +3,1,WHERE и фильтрация,12 +4,1,Агрегации и GROUP BY,18 +5,1,Подзапросы,25 +6,2,Pandas основы,20 +7,2,NumPy,15 +8,2,Визуализация данных,22 +9,3,Основы BI,12 +10,3,Метрики и дашборды,18 +11,3,Power BI введение,20 +12,4,Формулы в Excel,15 +13,4,Сводные таблицы,18 +14,4,Графики и диаграммы,14 +15,5,Введение в ML,20 +16,5,Линейная регрессия,30 +17,5,Классификация,28 +18,5,Кластеризация,25 diff --git a/pet-project/src/gp/users.csv b/pet-project/src/gp/users.csv new file mode 100644 index 0000000..c85dd0e --- /dev/null +++ b/pet-project/src/gp/users.csv @@ -0,0 +1,21 @@ +id,name,age,email,registration_date +1,Alice,25,alice@mail.com,2023-01-10 +5,Ethan,35,ethan@gmail.com,2023-03-10 +11,Kelly,23,kelly@gmail.com,2023-04-20 +12,Leo,31,leo@mail.com,2023-01-15 +14,Nina,28,nina@mail.com,2023-01-08 +15,Oscar,33,oscar@gmail.com,2023-01-10 +17,Quinn,24,quinn@gmail.com,2023-03-20 +20,Tina,25,tina@mail.com,2023-04-18 +2,Bob,30,bob@gmail.com,2023-02-05 +3,Charlie,22,charlie@mail.com,2023-02-20 +4,Diana,28,diana@mail.com,2023-03-01 +6,Frank,29,frank@mail.com,2023-01-05 +7,Grace,26,grace@gmail.com,2023-01-12 +8,Henry,32,henry@mail.com,2023-01-20 +9,Iris,24,iris@gmail.com,2023-02-10 +10,Jack,27,jack@mail.com,2023-04-15 +13,Mia,26,mia@gmail.com,2023-01-18 +16,Paul,29,paul@mail.com,2023-02-15 +18,Rita,27,rita@mail.com,2023-04-01 +19,Sam,30,sam@gmail.com,2023-04-10 diff --git a/pet-project/src/pg/courses.csv b/pet-project/src/pg/courses.csv new file mode 100644 index 0000000..7c6a3d3 --- /dev/null +++ b/pet-project/src/pg/courses.csv @@ -0,0 +1,6 @@ +id,title,category,created_at +1,SQL для начинающих,data,2023-01-01 +2,Python для анализа данных,programming,2023-01-15 +3,BI с нуля,business,2023-03-01 +4,Excel для аналитиков,business,2023-02-01 +5,Основы машинного обучения,data,2023-03-15 diff --git a/pet-project/src/pg/enrollments.csv b/pet-project/src/pg/enrollments.csv new file mode 100644 index 0000000..3463f1d --- /dev/null +++ b/pet-project/src/pg/enrollments.csv @@ -0,0 +1,46 @@ +id,user_id,course_id,enrolled_at +1,1,1,2023-01-15 +2,1,2,2023-02-01 +3,2,1,2023-01-20 +4,3,2,2023-03-05 +5,4,3,2023-04-01 +6,6,1,2023-01-06 +7,6,2,2023-01-10 +8,6,4,2023-02-05 +9,7,1,2023-01-13 +10,7,3,2023-03-10 +11,8,1,2023-01-22 +12,8,2,2023-01-22 +13,8,3,2023-01-22 +14,9,4,2023-02-12 +15,10,1,2023-04-16 +16,11,2,2023-04-21 +17,12,1,2023-01-16 +18,13,2,2023-01-19 +19,13,4,2023-02-20 +20,14,1,2023-01-09 +21,14,2,2023-01-09 +22,14,3,2023-01-09 +23,14,4,2023-02-01 +24,14,5,2023-03-20 +25,15,1,2023-01-11 +26,15,3,2023-03-05 +27,16,2,2023-02-16 +28,16,4,2023-02-16 +29,17,3,2023-03-22 +30,17,5,2023-03-25 +31,18,1,2023-04-02 +32,19,1,2023-04-11 +33,19,2,2023-04-11 +34,20,2,2023-04-19 +35,5,5,2023-03-20 +36,1,4,2023-02-10 +37,2,3,2023-03-15 +38,3,1,2023-02-25 +39,4,2,2023-03-10 +40,6,3,2023-03-08 +41,7,2,2023-02-01 +42,9,1,2023-02-15 +43,12,3,2023-03-10 +44,15,2,2023-02-01 +45,18,4,2023-04-05 diff --git a/pet-project/src/pg/lesson_views.csv b/pet-project/src/pg/lesson_views.csv new file mode 100644 index 0000000..e4fbbf5 --- /dev/null +++ b/pet-project/src/pg/lesson_views.csv @@ -0,0 +1,58 @@ +id,user_id,lesson_id,viewed_at +1,1,1,2023-01-16 10:00:00.000000 +2,1,2,2023-01-16 10:15:00.000000 +3,1,3,2023-01-17 09:00:00.000000 +4,1,4,2023-01-17 09:30:00.000000 +5,1,4,2023-01-17 11:00:00.000000 +6,1,4,2023-01-18 10:00:00.000000 +7,1,6,2023-02-02 14:00:00.000000 +8,1,12,2023-02-15 10:00:00.000000 +9,1,13,2023-02-15 10:30:00.000000 +10,2,1,2023-01-21 09:00:00.000000 +11,2,9,2023-03-16 10:00:00.000000 +12,2,10,2023-03-16 10:30:00.000000 +13,3,6,2023-03-06 12:00:00.000000 +14,3,1,2023-02-26 11:00:00.000000 +15,3,2,2023-02-26 11:20:00.000000 +16,4,9,2023-04-02 10:00:00.000000 +17,4,10,2023-04-03 10:00:00.000000 +18,4,6,2023-03-12 14:00:00.000000 +19,4,7,2023-03-13 09:00:00.000000 +20,5,15,2023-03-21 10:00:00.000000 +21,5,16,2023-03-22 11:00:00.000000 +22,6,1,2023-01-07 10:00:00.000000 +23,6,2,2023-01-07 10:15:00.000000 +24,6,3,2023-01-08 09:00:00.000000 +25,6,4,2023-01-08 09:30:00.000000 +26,6,5,2023-01-09 10:00:00.000000 +27,6,6,2023-01-11 14:00:00.000000 +28,6,7,2023-01-12 10:00:00.000000 +29,6,8,2023-01-13 11:00:00.000000 +30,6,12,2023-02-06 09:00:00.000000 +31,6,13,2023-02-06 09:30:00.000000 +32,6,14,2023-02-07 10:00:00.000000 +33,6,9,2023-03-09 11:00:00.000000 +34,7,1,2023-01-14 10:00:00.000000 +35,7,2,2023-01-14 10:20:00.000000 +36,7,3,2023-01-15 09:00:00.000000 +37,7,9,2023-03-11 14:00:00.000000 +38,7,10,2023-03-11 14:30:00.000000 +39,9,12,2023-02-13 10:00:00.000000 +40,9,13,2023-02-13 10:30:00.000000 +41,9,1,2023-02-16 11:00:00.000000 +42,12,1,2023-01-17 10:00:00.000000 +43,12,2,2023-01-17 10:20:00.000000 +44,12,1,2023-04-10 11:00:00.000000 +45,12,3,2023-04-10 11:30:00.000000 +46,12,9,2023-03-12 10:00:00.000000 +47,13,6,2023-01-20 14:00:00.000000 +48,13,7,2023-01-21 10:00:00.000000 +49,13,12,2023-02-21 09:00:00.000000 +50,14,1,2023-01-09 08:00:00.000000 +51,14,2,2023-01-09 08:15:00.000000 +52,14,3,2023-01-10 10:00:00.000000 +53,14,4,2023-01-10 10:30:00.000000 +54,14,5,2023-01-11 09:00:00.000000 +55,14,6,2023-01-10 14:00:00.000000 +56,14,7,2023-01-11 10:00:00.000000 +57,14,8,2023-01-12 11:00:00.000000 diff --git a/pet-project/src/pg/lessons.csv b/pet-project/src/pg/lessons.csv new file mode 100644 index 0000000..217190b --- /dev/null +++ b/pet-project/src/pg/lessons.csv @@ -0,0 +1,19 @@ +id,course_id,title,duration_min +1,1,SELECT и FROM,10 +2,1,JOIN,15 +3,1,WHERE и фильтрация,12 +4,1,Агрегации и GROUP BY,18 +5,1,Подзапросы,25 +6,2,Pandas основы,20 +7,2,NumPy,15 +8,2,Визуализация данных,22 +9,3,Основы BI,12 +10,3,Метрики и дашборды,18 +11,3,Power BI введение,20 +12,4,Формулы в Excel,15 +13,4,Сводные таблицы,18 +14,4,Графики и диаграммы,14 +15,5,Введение в ML,20 +16,5,Линейная регрессия,30 +17,5,Классификация,28 +18,5,Кластеризация,25 diff --git a/pet-project/src/pg/users.csv b/pet-project/src/pg/users.csv new file mode 100644 index 0000000..2a3eea9 --- /dev/null +++ b/pet-project/src/pg/users.csv @@ -0,0 +1,21 @@ +id,name,age,email,registration_date +1,Alice,25,alice@mail.com,2023-01-10 +2,Bob,30,bob@gmail.com,2023-02-05 +3,Charlie,22,charlie@mail.com,2023-02-20 +4,Diana,28,diana@mail.com,2023-03-01 +5,Ethan,35,ethan@gmail.com,2023-03-10 +6,Frank,29,frank@mail.com,2023-01-05 +7,Grace,26,grace@gmail.com,2023-01-12 +8,Henry,32,henry@mail.com,2023-01-20 +9,Iris,24,iris@gmail.com,2023-02-10 +10,Jack,27,jack@mail.com,2023-04-15 +11,Kelly,23,kelly@gmail.com,2023-04-20 +12,Leo,31,leo@mail.com,2023-01-15 +13,Mia,26,mia@gmail.com,2023-01-18 +14,Nina,28,nina@mail.com,2023-01-08 +15,Oscar,33,oscar@gmail.com,2023-01-10 +16,Paul,29,paul@mail.com,2023-02-15 +17,Quinn,24,quinn@gmail.com,2023-03-20 +18,Rita,27,rita@mail.com,2023-04-01 +19,Sam,30,sam@gmail.com,2023-04-10 +20,Tina,25,tina@mail.com,2023-04-18