Files
spark/homework/task1.ipynb
2026-06-07 13:48:55 +07:00

84 lines
2.4 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f62f45bf-8549-4d60-b90f-270ed1c142b5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting default log level to \"WARN\".\n",
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
"26/06/06 09:07:09 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Активные Spark сессии: http://5e7ed2c4667c:4040\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"26/06/06 09:07:29 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors\n"
]
}
],
"source": [
"from pyspark.sql import SparkSession\n",
"\n",
"spark = SparkSession.builder.appName(\"test_pyspark_local\").getOrCreate()\n",
"print(\"Активные Spark сессии:\", spark.sparkContext.uiWebUrl)\n",
"\n",
"# 1\n",
"logs_rdd = spark.sparkContext.textFile(\"logs.txt\")\n",
"# 2 - transformation\n",
"errors_rdd = logs_rdd.filter(lambda line: \"ERROR\" in line)\n",
"# 3 - transformation\n",
"erros_upper_rdd = errors_rdd.map(lambda line: line.upper())\n",
"# 4 - transformation\n",
"error_count = erros_upper_rdd.count()\n",
"# 5 - action\n",
"erros_upper_rdd.saveAsTextFile(\"errors_upper.txt\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "d95abd88-700e-4615-8a7a-98a4f9b4877d",
"metadata": {},
"outputs": [],
"source": [
"df = spark.read.text(\"logs.txt\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}