spark-sql 参数配置与调优

WBerica2024-10-13 15:16

set hive.exec.dynamic.partition=true; --开启动态分区

set hive.exec.dynamic.partition.mode=nonstrict; --允许所有分区是动态的

set hive.exec.max.dynamic.partitions.pernode=1000; --每个mapper/reducer可以创建的最大动态分区数

--set hive.exec.dynamic.partitions=10000; 这个可不要

set hive.execution.engine=spark;

set spark.master=yarn-cluster;

set spark.app.name=${name};

set spark.sql.crossJoin.enabled =true

默认是true

设置成false

set spark.sql.decimalOperations.allowPrecisionLoss = false

set spark.dynamicAllocation.enabled = true //开启动态资源分配

set spark.dynamicAllocation.minExecutors=1 //每个Application最⼩分配的executor数

set spark.dynamicAllocation.maxExecutors =20 //每个Application最⼤并发分配的executor数

set spark.sql.autoBroadcastJoinThreshold 默认是开启的 101024 1024

禁用设置为-1

set spark.executor.memory=8G

set spark.driver.maxResultSize=2G

spark.default.parallelism对于处理RDD有效；

spark.sql.shuffle.partitions

set spark.executor.instances=25;

set spark.executor.cores=4;

set spark.yarn.executor.memoryOverhead=2048;