{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f62f45bf-8549-4d60-b90f-270ed1c142b5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "26/06/06 09:07:09 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Активные Spark сессии: http://5e7ed2c4667c:4040\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "26/06/06 09:07:29 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors\n" ] } ], "source": [ "from pyspark.sql import SparkSession\n", "\n", "spark = SparkSession.builder.appName(\"test_pyspark_local\").getOrCreate()\n", "print(\"Активные Spark сессии:\", spark.sparkContext.uiWebUrl)\n", "\n", "# 1\n", "logs_rdd = spark.sparkContext.textFile(\"logs.txt\")\n", "# 2 - transformation\n", "errors_rdd = logs_rdd.filter(lambda line: \"ERROR\" in line)\n", "# 3 - transformation\n", "erros_upper_rdd = errors_rdd.map(lambda line: line.upper())\n", "# 4 - transformation\n", "error_count = erros_upper_rdd.count()\n", "# 5 - action\n", "erros_upper_rdd.saveAsTextFile(\"errors_upper.txt\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "d95abd88-700e-4615-8a7a-98a4f9b4877d", "metadata": {}, "outputs": [], "source": [ "df = spark.read.text(\"logs.txt\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }