84 lines
2.4 KiB
Plaintext
84 lines
2.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "f62f45bf-8549-4d60-b90f-270ed1c142b5",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Setting default log level to \"WARN\".\n",
|
|
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
|
|
"26/06/06 09:07:09 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Активные Spark сессии: http://5e7ed2c4667c:4040\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"26/06/06 09:07:29 WARN GarbageCollectionMetrics: To enable non-built-in garbage collector(s) List(G1 Concurrent GC), users should configure it(them) to spark.eventLog.gcMetrics.youngGenerationGarbageCollectors or spark.eventLog.gcMetrics.oldGenerationGarbageCollectors\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from pyspark.sql import SparkSession\n",
|
|
"\n",
|
|
"spark = SparkSession.builder.appName(\"test_pyspark_local\").getOrCreate()\n",
|
|
"print(\"Активные Spark сессии:\", spark.sparkContext.uiWebUrl)\n",
|
|
"\n",
|
|
"# 1\n",
|
|
"logs_rdd = spark.sparkContext.textFile(\"logs.txt\")\n",
|
|
"# 2 - transformation\n",
|
|
"errors_rdd = logs_rdd.filter(lambda line: \"ERROR\" in line)\n",
|
|
"# 3 - transformation\n",
|
|
"erros_upper_rdd = errors_rdd.map(lambda line: line.upper())\n",
|
|
"# 4 - transformation\n",
|
|
"error_count = erros_upper_rdd.count()\n",
|
|
"# 5 - action\n",
|
|
"erros_upper_rdd.saveAsTextFile(\"errors_upper.txt\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "d95abd88-700e-4615-8a7a-98a4f9b4877d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = spark.read.text(\"logs.txt\")\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|