Ubuntu datasophon1.2.1 二开之一:创建datasophon-worker 服务失败

Ubuntu datasophon1.2.1 二开之一:创建datasophon-worker 服务失败

背景

有网友需要再Ubuntu安装 大数据平台,

1、需要hadoop生态hadoop、spark、flink、Clickhouse/Starrock、

2、完善的监控

3、最好有统一的运维管理系统,类似CDH那种可视化的最好

比较了好几款开源工具,要么就是半开源,要么好久没更新。最后还是选择

datasophon ,虽然也两年没更新了,但是全部开源,有问题可以二开。

况且前段时间还在研究麒麟下安装datasophon,相对熟悉点。

问题

然后开始安装时,在创建集群节点时,就遇到了创建datasophon-worker服务失败问题。

这个问题,我在搞麒麟版时也遇到,不同linux 版本,这个服务描述问题,有些差别。

我是基于麒麟版本修改,直接让AI帮转换一下,就ok

解决

其实就改一个文件:StartWorkerHandler.java

java 复制代码
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.datasophon.api.master.handler.host;

import com.datasophon.api.load.ConfigBean;
import com.datasophon.api.utils.CommonUtils;
import com.datasophon.api.utils.MessageResolverUtils;
import com.datasophon.api.utils.MinaUtils;
import com.datasophon.api.utils.SpringTool;
import com.datasophon.common.Constants;
import com.datasophon.common.enums.InstallState;
import com.datasophon.common.model.HostInfo;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.sshd.client.session.ClientSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Date;

public class StartWorkerHandler implements DispatcherWorkerHandler {

    private static final Logger logger = LoggerFactory.getLogger(StartWorkerHandler.class);

    private Integer clusterId;

    private String clusterFrame;
    private static volatile String cachedServiceFilePath = null;
    private static final Object lock = new Object();
    public StartWorkerHandler(Integer clusterId, String clusterFrame) {
        this.clusterId = clusterId;
        this.clusterFrame = clusterFrame;
    }

    @Override
    public boolean handle(ClientSession session, HostInfo hostInfo) throws UnknownHostException {
        ConfigBean configBean = SpringTool.getApplicationContext().getBean(ConfigBean.class);
        String installPath = Constants.INSTALL_PATH;
        String localHostName = InetAddress.getLocalHost().getHostName();
        String command = Constants.UPDATE_COMMON_CMD +
                localHostName +
                Constants.SPACE +
                configBean.getServerPort() +
                Constants.SPACE +
                this.clusterFrame +
                Constants.SPACE +
                this.clusterId +
                Constants.SPACE +
                Constants.INSTALL_PATH;
        String updateCommonPropertiesResult = MinaUtils.execCmdWithResult(session,
                command);
        if (StringUtils.isBlank(updateCommonPropertiesResult) || "failed".equals(updateCommonPropertiesResult)) {
            logger.error("common.properties update failed,command:"+command);
            hostInfo.setErrMsg("common.properties update failed");
            hostInfo.setMessage(MessageResolverUtils.getMessage("modify.configuration.file.fail"));
            CommonUtils.updateInstallState(InstallState.FAILED, hostInfo);
        } else {
            //Initialize environment
            //datasophon-worker.service
            MinaUtils.execCmdWithResult(session, "systemctl stop datasophon-worker.service");
            MinaUtils.execCmdWithResult(session, "ulimit -n 102400");
            MinaUtils.execCmdWithResult(session, "sysctl -w vm.max_map_count=2000000");
            //Set startup and self start
            MinaUtils.execCmdWithResult(session,
                    "\\cp " + installPath + "/datasophon-worker/script/datasophon-worker /etc/init.d/");
            MinaUtils.execCmdWithResult(session, "chmod +x /etc/init.d/datasophon-worker");
            //MinaUtils.execCmdWithResult(session, "chkconfig --add datasophon-worker");
            //datasophon-worker.service
            String makeDWorkerTempFile = makeDWorkerServiceTempFile();
            boolean uploadFile =MinaUtils.uploadFile(session,"/usr/lib/systemd/system",makeDWorkerTempFile  );
//            try {
//                FileUtils.forceDelete(new File(makeDWorkerTempFile));
//            } catch (IOException e) {
//                logger.error("delete datasophon-worker.service temp file failed");
//                e.printStackTrace();
//            }
            if (!uploadFile) {
                logger.error("upload datasophon-worker.service failed");
                hostInfo.setErrMsg("upload datasophon-worker.service failed");
                hostInfo.setMessage(MessageResolverUtils.getMessage("upload.file.fail"));
                CommonUtils.updateInstallState(InstallState.FAILED, hostInfo);
                return false;
            }
            MinaUtils.execCmdWithResult(session,
                    "\\cp " + installPath + "/datasophon-worker/script/datasophon-env.sh /etc/profile.d/");
            MinaUtils.execCmdWithResult(session, "source /etc/profile.d/datasophon-env.sh");
            hostInfo.setMessage(MessageResolverUtils.getMessage("start.host.management.agent"));
            //systemctl enable
            String cleanupCmd = "rm -f /etc/init.d/datasophon-worker 2>/dev/null";
            MinaUtils.execCmdWithResult(session, cleanupCmd);
            // /opt/datasophon/datasophon-worker/logs
            MinaUtils.execCmdWithResult(session, "mkdir -p /opt/datasophon/datasophon-worker/logs");
            //#systemd
            MinaUtils.execCmdWithResult(session,"systemctl daemon-reload");
            //#
            MinaUtils.execCmdWithResult(session, "systemctl enable datasophon-worker.service");
            //#
            MinaUtils.execCmdWithResult(session, "systemctl start datasophon-worker.service");
//            MinaUtils.execCmdWithResult(session, "service datasophon-worker restart");
            hostInfo.setProgress(75);
            hostInfo.setCreateTime(new Date());
        }

        logger.info("end dispatcher host agent :{}", hostInfo.getHostname());
        return true;
    }

    /**
     * 创建临时文件
     * @return
     */
    private String makeDWorkerServiceTempFile() {
        // 双检锁确保只创建一次
        if (cachedServiceFilePath != null) {
            logger.info("使用缓存的服务文件: {}", cachedServiceFilePath);
            return cachedServiceFilePath;
        }

        final String methodName = "makeDWorkerServiceTempFile";

        logger.info("【开始】执行 {} 方法", methodName);
        synchronized (lock) {
            if (cachedServiceFilePath != null) {
                logger.info("使用缓存的服务文件: {}", cachedServiceFilePath);
                return cachedServiceFilePath;
            }
            StringBuilder sb = new StringBuilder();
            sb.append("[Unit]\n" +
                    "Description=DataSophon Worker Service (Ubuntu)\n" +
                    "After=network.target network-online.target\n" +
                    "Wants=network-online.target\n" +
                    "\n" +
                    "[Service]\n" +
                    "Type=forking\n" +
                    "Environment=JAVA_HOME=/usr/local/jdk1.8.0_333\n" +
                    "Environment=PATH=/usr/local/jdk1.8.0_333/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin\n" +
                    "Environment=LC_ALL=C.UTF-8\n" +
                    "WorkingDirectory=/opt/datasophon/datasophon-worker\n" +
                    "ExecStart=/opt/datasophon/datasophon-worker/bin/datasophon-worker.sh start worker\n" +
                    "ExecStop=/opt/datasophon/datasophon-worker/bin/datasophon-worker.sh stop worker\n" +
                    "ExecReload=/opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker\n" +
                    "User=root\n" +
                    "Group=root\n" +
                    "TimeoutStopSec=300\n" +
                    "KillMode=process\n" +
                    "Restart=on-failure\n" +
                    "RestartSec=10\n" +
                    "StandardOutput=journal\n" +
                    "StandardError=journal\n" +
                    "SyslogIdentifier=datasophon-worker\n" +
                    "\n" +
                    "ProtectSystem=full\n" +
                    "ReadWritePaths=/opt/datasophon/datasophon-worker/logs\n" +
                    "\n" +
                    "[Install]\n" +
                    "WantedBy=multi-user.target");

            // 添加详细日志
            logger.info("服务文件内容创建完成,内容为: {} ", sb.toString());

            try {
                // 获取临时目录
                String tempDirProperty = System.getProperty("java.io.tmpdir");
                logger.info("1. java.io.tmpdir = '{}'", tempDirProperty);

                if (tempDirProperty == null || tempDirProperty.trim().isEmpty()) {
                    logger.error("java.io.tmpdir 系统属性为空或null");
                    throw new RuntimeException("java.io.tmpdir system property is null or empty");
                }

                Path tempDirPath = Paths.get(tempDirProperty);
//                logger.info("2. 临时目录Path对象: {}", tempDirPath.toAbsolutePath());

                // 确保临时目录存在且有写权限
                if (!Files.exists(tempDirPath)) {
                    logger.info("3. 临时目录不存在,尝试创建...");
                    try {
                        Files.createDirectories(tempDirPath);
                        logger.info("   临时目录创建成功");
                    } catch (Exception e) {
                        logger.error("   创建临时目录失败", e);
                        throw new RuntimeException("Failed to create temp directory: " + tempDirPath, e);
                    }
                } else {
                    logger.info("3. 临时目录已存在");
                }

                // 检查写权限(添加详细日志)
                logger.info("4. 检查目录权限...");
                boolean isWritable = Files.isWritable(tempDirPath);
                logger.info("   Files.isWritable(tempDirPath) = {}", isWritable);
                logger.info("   tempDirPath.toFile().canWrite() = {}", tempDirPath.toFile().canWrite());

                if (!isWritable) {
                    // 更详细的权限诊断
                    logger.error("临时目录不可写,详细诊断:");
                    logger.error("   目录路径: {}", tempDirPath.toAbsolutePath());
                    logger.error("   目录所有者: {}", Files.getOwner(tempDirPath));
                    logger.error("   目录权限: {}", Files.getPosixFilePermissions(tempDirPath));
                    logger.error("   当前用户: {}", System.getProperty("user.name"));
                    logger.error("   当前用户主目录: {}", System.getProperty("user.home"));

                    throw new RuntimeException("No write permission to temp directory: " + tempDirPath);
                }

                // 创建临时文件
                logger.info("5. 创建临时文件...");
                String fileName = "datasophon-worker.service";
                Path tempFilePath = tempDirPath.resolve(fileName);
                logger.info("   目标文件路径: {}", tempFilePath.toAbsolutePath());

                // 检查文件是否已存在
                if (Files.exists(tempFilePath)) {
                    logger.warn("   文件已存在,将被覆盖");
                    try {
                        Files.delete(tempFilePath);
                        logger.info("   已删除旧文件");
                    } catch (Exception e) {
                        logger.warn("   删除旧文件失败,继续尝试覆盖", e);
                    }
                }

                // 写入文件
//                logger.info("6. 写入文件内容...");
                Files.write(tempFilePath, sb.toString().getBytes(StandardCharsets.UTF_8));

                // 验证文件是否创建成功
                if (!Files.exists(tempFilePath)) {
                    logger.error("文件创建后不存在,写入可能失败");
                    throw new RuntimeException("File not created after write operation");
                }

                long fileSize = Files.size(tempFilePath);
//                logger.info("7. 文件创建成功,大小: {} 字节", fileSize);

                if (fileSize == 0) {
                    logger.error("创建的文件大小为0,内容可能未写入");
                    throw new RuntimeException("Created file is empty");
                }

                // 读取并验证文件内容(可选,用于调试)
                String fileContent = new String(Files.readAllBytes(tempFilePath), StandardCharsets.UTF_8);
//                logger.info("8. 文件验证通过,内容长度: {} 字符", fileContent.length());
//                logger.debug("文件内容前100字符: {}",
//                        fileContent.substring(0, Math.min(100, fileContent.length())));
//
//                logger.info("【成功】{} 方法执行完成,返回文件路径: {}", methodName, tempFilePath);

                cachedServiceFilePath = tempFilePath.toString();
                logger.info("创建服务文件: {}", cachedServiceFilePath);

                return cachedServiceFilePath;

            } catch (Exception e) {
                logger.error("【失败】{} 方法执行异常", methodName, e);

                // 打印完整的堆栈信息
                e.printStackTrace();

                // 抛出运行时异常
                throw new RuntimeException("Failed to create service temp file: " + e.getMessage(), e);
            }
        }
    }

}

重点是:handle和makeDWorkerServiceTempFile方法。特别是makeDWorkerServiceTempFile。

增加功能:1.并发问题,多个线程同时创建服务配置文件

2.多次重装服务,先删除原来文件,停止原来服务,再安装。

3.ubuntu 服务特色配置

最后

这个问题,看结果挺简单,但是过程也有点长。

如需沟通:lita2lz

相关推荐
码农12138号2 小时前
Bugku HackINI 2022 Whois 详解
linux·web安全·ctf·命令执行·bugku·换行符
Joren的学习记录2 小时前
【Linux运维进阶知识】Nginx负载均衡
linux·运维·nginx
刘一说2 小时前
时空大数据与AI融合:重塑物理世界的智能中枢
大数据·人工智能·gis
用户2190326527352 小时前
Java后端必须的Docker 部署 Redis 集群完整指南
linux·后端
胡先生不姓胡2 小时前
如何获取跨系统调用的函数调用栈
linux
GIS数据转换器3 小时前
综合安防数智管理平台
大数据·网络·人工智能·安全·无人机
Jtti3 小时前
服务器防御SYN Flood攻击的方法
运维·服务器
2501_941982053 小时前
RPA 的跨平台部署与统一自动化策略
运维·自动化·rpa
b***25113 小时前
电池自动分选机:精密分选保障新能源产业质量核心
运维·自动化·制造
数数科技的数据干货3 小时前
游戏流失分析:一套经实战检验的「流程化操作指南」
大数据·运维·人工智能·游戏