我搜索了这种错误,但找不到任何有关如何解决它的信息。这是我执行以下两个脚本时得到的结果:
org.apache.arrow.memory.OutOfMemoryException: Failure while allocating memory.
写.py
import pandas as pd
from pyspark.sql import SparkSession
from os.path import abspath
warehouse_location = abspath('spark-warehouse')
booksPD = pd.read_csv('books.csv')
spark = SparkSession.builder \
.appName("MyApp") \
.master("local[*]") \
.config("spark.sql.execution.arrow.enabled", "true") \
.config("spark.driver.maxResultSize", "16g") \
.config("spark.python.worker.memory", "16g") \
.config("spark.sql.warehouse.dir", warehouse_location) \
.enableHiveSupport() \
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
spark.createDataFrame(booksPD).write.saveAsTable("books")
spark.catalog.clearCache()
读取.py
from pyspark.sql import SparkSession
from os.path import abspath
warehouse_location = abspath('spark-warehouse')
spark = SparkSession.builder \
.appName("MyApp") \
.master("local[*]") \
.config("spark.sql.execution.arrow.enabled", "true") \
.config("spark.driver.maxResultSize", "16g") \
.config("spark.python.worker.memory", "16g") \
.config("spark.sql.warehouse.dir", warehouse_location) \
.enableHiveSupport() \
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
books = spark.sql("SELECT * FROM books").toPandas()