Ubuntu datasophon1.2.1 二开之一:创建datasophon-worker 服务失败
背景
有网友需要再Ubuntu安装 大数据平台,
1、需要hadoop生态hadoop、spark、flink、Clickhouse/Starrock、
2、完善的监控
3、最好有统一的运维管理系统,类似CDH那种可视化的最好
比较了好几款开源工具,要么就是半开源,要么好久没更新。最后还是选择
datasophon ,虽然也两年没更新了,但是全部开源,有问题可以二开。
况且前段时间还在研究麒麟下安装datasophon,相对熟悉点。
问题
然后开始安装时,在创建集群节点时,就遇到了创建datasophon-worker服务失败问题。

这个问题,我在搞麒麟版时也遇到,不同linux 版本,这个服务描述问题,有些差别。
我是基于麒麟版本修改,直接让AI帮转换一下,就ok
解决
其实就改一个文件:StartWorkerHandler.java
java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasophon.api.master.handler.host;
import com.datasophon.api.load.ConfigBean;
import com.datasophon.api.utils.CommonUtils;
import com.datasophon.api.utils.MessageResolverUtils;
import com.datasophon.api.utils.MinaUtils;
import com.datasophon.api.utils.SpringTool;
import com.datasophon.common.Constants;
import com.datasophon.common.enums.InstallState;
import com.datasophon.common.model.HostInfo;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.sshd.client.session.ClientSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Date;
public class StartWorkerHandler implements DispatcherWorkerHandler {
private static final Logger logger = LoggerFactory.getLogger(StartWorkerHandler.class);
private Integer clusterId;
private String clusterFrame;
private static volatile String cachedServiceFilePath = null;
private static final Object lock = new Object();
public StartWorkerHandler(Integer clusterId, String clusterFrame) {
this.clusterId = clusterId;
this.clusterFrame = clusterFrame;
}
@Override
public boolean handle(ClientSession session, HostInfo hostInfo) throws UnknownHostException {
ConfigBean configBean = SpringTool.getApplicationContext().getBean(ConfigBean.class);
String installPath = Constants.INSTALL_PATH;
String localHostName = InetAddress.getLocalHost().getHostName();
String command = Constants.UPDATE_COMMON_CMD +
localHostName +
Constants.SPACE +
configBean.getServerPort() +
Constants.SPACE +
this.clusterFrame +
Constants.SPACE +
this.clusterId +
Constants.SPACE +
Constants.INSTALL_PATH;
String updateCommonPropertiesResult = MinaUtils.execCmdWithResult(session,
command);
if (StringUtils.isBlank(updateCommonPropertiesResult) || "failed".equals(updateCommonPropertiesResult)) {
logger.error("common.properties update failed,command:"+command);
hostInfo.setErrMsg("common.properties update failed");
hostInfo.setMessage(MessageResolverUtils.getMessage("modify.configuration.file.fail"));
CommonUtils.updateInstallState(InstallState.FAILED, hostInfo);
} else {
//Initialize environment
//datasophon-worker.service
MinaUtils.execCmdWithResult(session, "systemctl stop datasophon-worker.service");
MinaUtils.execCmdWithResult(session, "ulimit -n 102400");
MinaUtils.execCmdWithResult(session, "sysctl -w vm.max_map_count=2000000");
//Set startup and self start
MinaUtils.execCmdWithResult(session,
"\\cp " + installPath + "/datasophon-worker/script/datasophon-worker /etc/init.d/");
MinaUtils.execCmdWithResult(session, "chmod +x /etc/init.d/datasophon-worker");
//MinaUtils.execCmdWithResult(session, "chkconfig --add datasophon-worker");
//datasophon-worker.service
String makeDWorkerTempFile = makeDWorkerServiceTempFile();
boolean uploadFile =MinaUtils.uploadFile(session,"/usr/lib/systemd/system",makeDWorkerTempFile );
// try {
// FileUtils.forceDelete(new File(makeDWorkerTempFile));
// } catch (IOException e) {
// logger.error("delete datasophon-worker.service temp file failed");
// e.printStackTrace();
// }
if (!uploadFile) {
logger.error("upload datasophon-worker.service failed");
hostInfo.setErrMsg("upload datasophon-worker.service failed");
hostInfo.setMessage(MessageResolverUtils.getMessage("upload.file.fail"));
CommonUtils.updateInstallState(InstallState.FAILED, hostInfo);
return false;
}
MinaUtils.execCmdWithResult(session,
"\\cp " + installPath + "/datasophon-worker/script/datasophon-env.sh /etc/profile.d/");
MinaUtils.execCmdWithResult(session, "source /etc/profile.d/datasophon-env.sh");
hostInfo.setMessage(MessageResolverUtils.getMessage("start.host.management.agent"));
//systemctl enable
String cleanupCmd = "rm -f /etc/init.d/datasophon-worker 2>/dev/null";
MinaUtils.execCmdWithResult(session, cleanupCmd);
// /opt/datasophon/datasophon-worker/logs
MinaUtils.execCmdWithResult(session, "mkdir -p /opt/datasophon/datasophon-worker/logs");
//#systemd
MinaUtils.execCmdWithResult(session,"systemctl daemon-reload");
//#
MinaUtils.execCmdWithResult(session, "systemctl enable datasophon-worker.service");
//#
MinaUtils.execCmdWithResult(session, "systemctl start datasophon-worker.service");
// MinaUtils.execCmdWithResult(session, "service datasophon-worker restart");
hostInfo.setProgress(75);
hostInfo.setCreateTime(new Date());
}
logger.info("end dispatcher host agent :{}", hostInfo.getHostname());
return true;
}
/**
* 创建临时文件
* @return
*/
private String makeDWorkerServiceTempFile() {
// 双检锁确保只创建一次
if (cachedServiceFilePath != null) {
logger.info("使用缓存的服务文件: {}", cachedServiceFilePath);
return cachedServiceFilePath;
}
final String methodName = "makeDWorkerServiceTempFile";
logger.info("【开始】执行 {} 方法", methodName);
synchronized (lock) {
if (cachedServiceFilePath != null) {
logger.info("使用缓存的服务文件: {}", cachedServiceFilePath);
return cachedServiceFilePath;
}
StringBuilder sb = new StringBuilder();
sb.append("[Unit]\n" +
"Description=DataSophon Worker Service (Ubuntu)\n" +
"After=network.target network-online.target\n" +
"Wants=network-online.target\n" +
"\n" +
"[Service]\n" +
"Type=forking\n" +
"Environment=JAVA_HOME=/usr/local/jdk1.8.0_333\n" +
"Environment=PATH=/usr/local/jdk1.8.0_333/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin\n" +
"Environment=LC_ALL=C.UTF-8\n" +
"WorkingDirectory=/opt/datasophon/datasophon-worker\n" +
"ExecStart=/opt/datasophon/datasophon-worker/bin/datasophon-worker.sh start worker\n" +
"ExecStop=/opt/datasophon/datasophon-worker/bin/datasophon-worker.sh stop worker\n" +
"ExecReload=/opt/datasophon/datasophon-worker/bin/datasophon-worker.sh restart worker\n" +
"User=root\n" +
"Group=root\n" +
"TimeoutStopSec=300\n" +
"KillMode=process\n" +
"Restart=on-failure\n" +
"RestartSec=10\n" +
"StandardOutput=journal\n" +
"StandardError=journal\n" +
"SyslogIdentifier=datasophon-worker\n" +
"\n" +
"ProtectSystem=full\n" +
"ReadWritePaths=/opt/datasophon/datasophon-worker/logs\n" +
"\n" +
"[Install]\n" +
"WantedBy=multi-user.target");
// 添加详细日志
logger.info("服务文件内容创建完成,内容为: {} ", sb.toString());
try {
// 获取临时目录
String tempDirProperty = System.getProperty("java.io.tmpdir");
logger.info("1. java.io.tmpdir = '{}'", tempDirProperty);
if (tempDirProperty == null || tempDirProperty.trim().isEmpty()) {
logger.error("java.io.tmpdir 系统属性为空或null");
throw new RuntimeException("java.io.tmpdir system property is null or empty");
}
Path tempDirPath = Paths.get(tempDirProperty);
// logger.info("2. 临时目录Path对象: {}", tempDirPath.toAbsolutePath());
// 确保临时目录存在且有写权限
if (!Files.exists(tempDirPath)) {
logger.info("3. 临时目录不存在,尝试创建...");
try {
Files.createDirectories(tempDirPath);
logger.info(" 临时目录创建成功");
} catch (Exception e) {
logger.error(" 创建临时目录失败", e);
throw new RuntimeException("Failed to create temp directory: " + tempDirPath, e);
}
} else {
logger.info("3. 临时目录已存在");
}
// 检查写权限(添加详细日志)
logger.info("4. 检查目录权限...");
boolean isWritable = Files.isWritable(tempDirPath);
logger.info(" Files.isWritable(tempDirPath) = {}", isWritable);
logger.info(" tempDirPath.toFile().canWrite() = {}", tempDirPath.toFile().canWrite());
if (!isWritable) {
// 更详细的权限诊断
logger.error("临时目录不可写,详细诊断:");
logger.error(" 目录路径: {}", tempDirPath.toAbsolutePath());
logger.error(" 目录所有者: {}", Files.getOwner(tempDirPath));
logger.error(" 目录权限: {}", Files.getPosixFilePermissions(tempDirPath));
logger.error(" 当前用户: {}", System.getProperty("user.name"));
logger.error(" 当前用户主目录: {}", System.getProperty("user.home"));
throw new RuntimeException("No write permission to temp directory: " + tempDirPath);
}
// 创建临时文件
logger.info("5. 创建临时文件...");
String fileName = "datasophon-worker.service";
Path tempFilePath = tempDirPath.resolve(fileName);
logger.info(" 目标文件路径: {}", tempFilePath.toAbsolutePath());
// 检查文件是否已存在
if (Files.exists(tempFilePath)) {
logger.warn(" 文件已存在,将被覆盖");
try {
Files.delete(tempFilePath);
logger.info(" 已删除旧文件");
} catch (Exception e) {
logger.warn(" 删除旧文件失败,继续尝试覆盖", e);
}
}
// 写入文件
// logger.info("6. 写入文件内容...");
Files.write(tempFilePath, sb.toString().getBytes(StandardCharsets.UTF_8));
// 验证文件是否创建成功
if (!Files.exists(tempFilePath)) {
logger.error("文件创建后不存在,写入可能失败");
throw new RuntimeException("File not created after write operation");
}
long fileSize = Files.size(tempFilePath);
// logger.info("7. 文件创建成功,大小: {} 字节", fileSize);
if (fileSize == 0) {
logger.error("创建的文件大小为0,内容可能未写入");
throw new RuntimeException("Created file is empty");
}
// 读取并验证文件内容(可选,用于调试)
String fileContent = new String(Files.readAllBytes(tempFilePath), StandardCharsets.UTF_8);
// logger.info("8. 文件验证通过,内容长度: {} 字符", fileContent.length());
// logger.debug("文件内容前100字符: {}",
// fileContent.substring(0, Math.min(100, fileContent.length())));
//
// logger.info("【成功】{} 方法执行完成,返回文件路径: {}", methodName, tempFilePath);
cachedServiceFilePath = tempFilePath.toString();
logger.info("创建服务文件: {}", cachedServiceFilePath);
return cachedServiceFilePath;
} catch (Exception e) {
logger.error("【失败】{} 方法执行异常", methodName, e);
// 打印完整的堆栈信息
e.printStackTrace();
// 抛出运行时异常
throw new RuntimeException("Failed to create service temp file: " + e.getMessage(), e);
}
}
}
}
重点是:handle和makeDWorkerServiceTempFile方法。特别是makeDWorkerServiceTempFile。
增加功能:1.并发问题,多个线程同时创建服务配置文件
2.多次重装服务,先删除原来文件,停止原来服务,再安装。
3.ubuntu 服务特色配置
最后
这个问题,看结果挺简单,但是过程也有点长。
如需沟通:lita2lz