启动spark历史服务失败问题处理

1.场景

执行启动spark历史服务器 start-history-server.sh报错

root@manager file# $SPARK_HOME/sbin/start-history-server.sh

starting org.apache.spark.deploy.history.HistoryServer, logging to /home/bigdata/spark/logs/spark-root-org.apache.spark.deploy.history.HistoryServer-1-manager.out

root@manager file# more /home/bigdata/spark/logs/spark-root-org.apache.spark.deploy.history.HistoryServer-1-manager.out

Spark Command: /usr/java/jdk1.8.0_112/bin/java -cp /home/bigdata/spark/conf/:/home/bigdata/spark/jars/*:/home/bigdata/hadoop/e

tc/hadoop/ -Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=zook_node1:2181,zook_node2:2181,zook_node3:2181

-Dspark.deploy.zookeeper.dir=/home/bigdata/zookeeper -Xmx1g org.apache.spark.deploy.history.HistoryServer

========================================

2023-09-18 16:46:29,971 INFO history.HistoryServer: Started daemon with process name: 18867@manager

2023-09-18 16:46:29,974 INFO util.SignalUtils: Registering signal handler for TERM

2023-09-18 16:46:29,975 INFO util.SignalUtils: Registering signal handler for HUP

2023-09-18 16:46:29,975 INFO util.SignalUtils: Registering signal handler for INT

2023-09-18 16:46:30,192 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-ja

va classes where applicable

2023-09-18 16:46:30,271 INFO spark.SecurityManager: Changing view acls to: root

2023-09-18 16:46:30,272 INFO spark.SecurityManager: Changing modify acls to: root

2023-09-18 16:46:30,273 INFO spark.SecurityManager: Changing view acls groups to:

2023-09-18 16:46:30,273 INFO spark.SecurityManager: Changing modify acls groups to:

2023-09-18 16:46:30,274 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with vi

ew permissions: Set(root); groups with view permissions: Set(); users with modify permissions: Set(root); groups with modify

permissions: Set()

2023-09-18 16:46:30,360 INFO history.FsHistoryProvider: History server ui acls disabled; users with admin permissions: ; group

s with admin permissions:

2023-09-18 16:46:31,011 INFO util.log: Logging initialized @1850ms to org.sparkproject.jetty.util.log.Slf4jLog

2023-09-18 16:46:31,062 INFO server.Server: jetty-9.4.44.v20210927; built: 2021-09-27T23:02:44.612Z; git: 8da83308eeca865e495e

53ef315a249d63ba9332; jvm 1.8.0_112-b15

2023-09-18 16:46:31,084 INFO server.Server: Started @1924ms

2023-09-18 16:46:31,130 INFO server.AbstractConnector: Started ServerConnector@5ae76500{HTTP/1.1, (http/1.1)}{0.0.0.0:18080}

2023-09-18 16:46:31,131 INFO util.Utils: Successfully started service 'HistoryServerUI' on port 18080.

2023-09-18 16:46:31,156 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7e094740{/,null,AVAILABLE,@Spark}

2023-09-18 16:46:31,157 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@24a1c17f{/json,null,AVAILABLE,@Spar

k}

2023-09-18 16:46:31,158 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@73511076{/api,null,AVAILABLE,@Spark

}

2023-09-18 16:46:31,167 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@514eedd8{/static,null,AVAILABLE,@Sp

ark}

2023-09-18 16:46:31,168 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1a2e2935{/history,null,AVAILABLE,@S

park}

2023-09-18 16:46:31,170 INFO history.HistoryServer: Bound HistoryServer to 0.0.0.0, and started at http://manager:18080

Exception in thread "main" java.io.FileNotFoundException: Log directory specified does not exist: hdfs://mycluster/spark_logs

at org.apache.spark.deploy.history.FsHistoryProvider.startPolling(FsHistoryProvider.scala:291)

at org.apache.spark.deploy.history.FsHistoryProvider.initialize(FsHistoryProvider.scala:236)

at org.apache.spark.deploy.history.FsHistoryProvider.start(FsHistoryProvider.scala:413)

at org.apache.spark.deploy.history.HistoryServer$.main(HistoryServer.scala:311)

at org.apache.spark.deploy.history.HistoryServer.main(HistoryServer.scala)

Caused by: java.io.FileNotFoundException: File does not exist: hdfs://mycluster/spark_logs

at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1756)

at org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1749)

at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)

at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1764)

at org.apache.spark.deploy.history.FsHistoryProvider.startPolling(FsHistoryProvider.scala:281)

... 4 more

root@manager file#

2.日志分析

关键是这一句:Log directory specified does not exist: hdfs://mycluster/spark_logs

意思是:找不到指定文件

3.解决方案

需要预先创建文件即可

root@manager conf# hadoop dfs -mkdir hdfs://mycluster/spark_logs

root@manager conf# hdfs dfs -chmod 777 /home/bigdata/spark/conf/spark_logs

4.重新启动验证

root@manager file# $SPARK_HOME/sbin/stop-history-server.sh

stopping org.apache.spark.deploy.history.HistoryServer

root@manager file# $SPARK_HOME/sbin/start-history-server.sh

starting org.apache.spark.deploy.history.HistoryServer, logging to /home/bigdata/spark/logs/spark-root-org.apache.spark.deploy.history.HistoryServer-1-manager.out

root@manager file# tail -f /home/bigdata/spark/logs/spark-root-org.apache.spark.deploy.history.HistoryServer-1-manager.out

2023-09-18 17:10:07,780 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@56102e1c{/api,null,AVAILABLE,@Spark}

2023-09-18 17:10:07,789 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@51f49060{/static,null,AVAILABLE,@Spark}

2023-09-18 17:10:07,790 INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@687a762c{/history,null,AVAILABLE,@Spark}

2023-09-18 17:10:07,791 INFO history.HistoryServer: Bound HistoryServer to 0.0.0.0, and started at http://manager:18080

2023-09-18 17:10:07,952 INFO history.FsHistoryProvider: Parsing hdfs://mycluster/spark_logs/local-1695027842090.zstd for listing data...

2023-09-18 17:10:07,952 INFO history.FsHistoryProvider: Parsing hdfs://mycluster/spark_logs/local-1695027960132.zstd for listing data...

2023-09-18 17:10:08,694 INFO history.FsHistoryProvider: Finished parsing hdfs://mycluster/spark_logs/local-1695027842090.zstd

2023-09-18 17:10:08,694 INFO history.FsHistoryProvider: Finished parsing hdfs://mycluster/spark_logs/local-1695027960132.zstd

2023-09-18 17:10:08,700 INFO history.FsHistoryProvider: Parsing hdfs://mycluster/spark_logs/local-1695027671578.zstd.inprogress for listing data...

2023-09-18 17:10:08,863 INFO history.FsHistoryProvider: Finished parsing hdfs://mycluster/spark_logs/local-1695027671578.zstd.inprogress

相关推荐
阿坤带你走近大数据5 分钟前
实时数据开发的一些实战经验
大数据
木雷坞7 分钟前
LiteLLM Docker 部署:config.yaml、Master Key 和 Postgres 配置
运维·docker·容器·litellm
AC赳赳老秦9 分钟前
OpenClaw + 阿里云 OSS 自动化:批量上传下载文件、自动备份本地数据到云端
运维·数据库·阿里云·自动化·云计算·deepseek·openclaw
数智化管理手记9 分钟前
三步轻量化落地法!精益赋能数字化,让工厂转型告别形式化
运维·数据库·人工智能·精益工程
无忧智库14 分钟前
[特殊字符]【万字深度解析】一站式全域数据资产运营平台解决方案——企业数字化转型的数据治理终极答案(PPT)
大数据·人工智能
七夜zippoe17 分钟前
DolphinDB MQTT协议接入:工业设备数据采集
运维·mqtt·dolphindb·工业设备·协议接入
Championship.23.2418 分钟前
Linux 3.0 串口机制深度解析:传统8250驱动与基础RS-232/485支持
linux·运维·服务器
AI智图坊26 分钟前
亚马逊多站点Listing视觉制作的效率瓶颈与AI解决方案:GPT-Image-2与Nano Banana Pro双模型分析
大数据·前端·数据库·人工智能·自动化·aigc
王小王-12326 分钟前
基于机器学习的垃圾短信检测研究
大数据·人工智能·机器学习·垃圾短信检测·垃圾短信识别
r-t-H36 分钟前
Docker进阶与容器编排实践-第三章
运维·docker·容器