一、安装注意实现以及初识JAVA代码组件
java
JAVA组件一些小技巧
1、JAVA组件get和set值的方法
r = createOutputRow(r, data.outputRowMeta.size());
String uuid = this.get(Fields.In, "uuid").getString(r); //获取值
if (uuid == null || "".equals(uuid)) {
uuid = "xxxxxxx";
} else {
uuid += "_juxin";
}
this.get(Fields.Out, "code").setValue(r, uuid); //设置值,注意在最左下角字段这里要写上code 类型String
putRow(data.outputRowMeta, r);
2、格式化java代码 https://formatjava.github.net.cn/
3、基础训练题
import cn.hutool.core.date.DateUtil;
import cn.hutool.json.JSONObject;
import java.util.Date;
import java.math.BigDecimal;
import java.math.RoundingMode;
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException
{
if(first)
{
first = false;
/* TODO: Your code here. (Using info fields)
FieldHelper infoField = get(Fields.Info, "info_field_name");
RowSet infoStream = findInfoRowSet("info_stream_tag");
Object[] infoRow = null;
int infoRowCount = 0;
// Read all rows from info step before calling getRow() method, which returns first row from any
// input rowset. As rowMeta for info and input steps varies getRow() can lead to errors.
while((infoRow = getRowFrom(infoStream)) != null){
// do something with info data
infoRowCount++;
}
*/
}
Object[] r = getRow();
if(r == null)
{
setOutputDone();
return false;
}
// It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
// enough to handle any new fields you are creating in this step.
r = createOutputRow(r, data.outputRowMeta.size());
/*
场景:输入流包含 order_id(字符串)和 amount(数值)两个字段,要求在 Java 代码步骤中添加 3 个新字段:
process_time:当前时间(格式:yyyy-MM-dd HH:mm:ss)
amount_double:将 amount 转换为 Double 类型(保留 2 位小数)
is_large:如果 amount_double > 1000 则为 "是",否则为 "否"
要求:使用 get(Fields.In, ...) 读取输入字段,用 get(Fields.Out, ...).setValue(...) 设置输出字段。
*/
// 获取当前时间
Date now = new Date();
this.get(Fields.Out, "process_time").setValue(r, DateUtil.format(now, "yyyy-MM-dd HH:mm:ss"));
String amount_double = get(Fields.In, "amount").getString(r);
// 转换并保留两位小数
Double result = new BigDecimal(amount_double).setScale(2, RoundingMode.HALF_UP) // 保留2位小数,四舍五入
.doubleValue();
logBasic("amount_double " + result);
String is_large = result > 1000.00 ? "是" : "否";
this.get(Fields.Out, "process_time").setValue(r, DateUtil.format(now, "yyyy-MM-dd HH:mm:ss"));
this.get(Fields.Out, "amount_double").setValue(r, result);
this.get(Fields.Out, "is_large").setValue(r, is_large);
// Send the row on to the next step.
putRow(data.outputRowMeta, r);
return true;
}
二、JAVA代码组件 函数方法以及try catch使用
测试题 2:字段条件计算
场景:输入包含 birth_date(日期字符串,格式:yyyy-MM-dd),要求计算:
age:根据 birth_date 计算年龄(取整数)
age_group:根据年龄分组(<18:少年;18-60:成年;>60:老年)
is_birthday:如果今天是用户生日(月和日匹配),则为 "Y",否则为 "N"
提示:可使用 DateUtil 处理日期(需导入:import org.apache.commons.lang3.time.DateUtils;)
java
import cn.hutool.core.date.DateUtil;
import java.util.Date;
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
if (first) {
first = false;
// 初始化代码(如果需要)
}
Object[] r = getRow();
if (r == null) {
setOutputDone();
return false;
}
// 创建输出行
r = createOutputRow(r, data.outputRowMeta.size());
try {
// 获取输入的出生日期字段
String birthDate = this.get(Fields.In, "birthDate").getString(r);
// 计算年龄
int age = calculateAge(birthDate);
// 确定年龄分组
String ageGroup = determineAgeGroup(age);
// 判断是否为生日
String isBirthday = isTodayBirthday(birthDate) ? "Y" : "N";
// 设置输出字段值
this.get(Fields.Out, "age").setValue(r, age);
this.get(Fields.Out, "age_group").setValue(r, ageGroup);
this.get(Fields.Out, "is_birthday").setValue(r, isBirthday);
logDebug("处理出生日期: " + birthDate + ", 年龄: " + age + ", 分组: " + ageGroup + ", 是否生日: " + isBirthday);
} catch (Exception e) {
// 修正logError方法调用,使用正确的参数形式
logError("处理出生日期时发生错误: " + e.getMessage(), e);
// 或者使用单参数形式: logError("处理出生日期时发生错误: " + e.getMessage());
}
// 将处理后的行传递到下一个步骤
putRow(data.outputRowMeta, r);
return true;
}
/**
* 根据出生日期计算年龄
* @param birthDate 出生日期字符串,格式为yyyy-MM-dd
* @return 年龄
*/
private int calculateAge(String birthDate) {
if (!isValidDateFormat(birthDate)) {
throw new IllegalArgumentException("出生日期格式不正确,应为yyyy-MM-dd: " + birthDate);
}
String[] dateParts = birthDate.split("-");
int birthYear = Integer.parseInt(dateParts[0]);
int birthMonth = Integer.parseInt(dateParts[1]);
int birthDay = Integer.parseInt(dateParts[2]);
Date now = new Date();
int currentYear = DateUtil.year(now);
int currentMonth = DateUtil.month(now) + 1; // Hutool的month是0-based
int currentDay = DateUtil.dayOfMonth(now);
int age = currentYear - birthYear;
// 调整年龄:如果还没过生日则减1
if (currentMonth < birthMonth) {
age--;
} else if (currentMonth == birthMonth && currentDay < birthDay) {
age--;
}
return age;
}
/**
* 确定年龄分组
* @param age 年龄
* @return 年龄分组(少年/成年/老年)
*/
private String determineAgeGroup(int age) {
if (age < 18) {
return "少年";
} else if (age <= 60) {
return "成年";
} else {
return "老年";
}
}
/**
* 判断今天是否为生日
* @param birthDate 出生日期字符串,格式为yyyy-MM-dd
* @return 是生日返回true,否则返回false
*/
private boolean isTodayBirthday(String birthDate) {
if (!isValidDateFormat(birthDate)) {
return false;
}
String[] dateParts = birthDate.split("-");
String birthMonthDay = dateParts[1] + "-" + dateParts[2];
String todayMonthDay = DateUtil.format(new Date(), "MM-dd");
return todayMonthDay.equals(birthMonthDay);
}
/**
* 验证日期格式是否为yyyy-MM-dd
* @param date 日期字符串
* @return 格式正确返回true,否则返回false
*/
private boolean isValidDateFormat(String date) {
if (date == null || date.trim().isEmpty()) {
return false;
}
String[] parts = date.split("-");
if (parts.length != 3) {
return false;
}
// 简单验证每个部分的长度
return parts[0].length() == 4 && parts[1].length() == 2 && parts[2].length() == 2;
}
三、JAVA代码组件 多字段拼接与转换
测试题 3:多字段拼接与转换
场景:输入包含 first_name、last_name(字符串)、phone(字符串,如:13800138000),要求:
full_name:拼接 last_name + "·" + first_name(如:张・三)
phone_masked:手机号中间 4 位替换为 *(如:138****8000)
name_length:full_name 的字符长度(含中间的 "・")
java
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException
{
if(first)
{
first = false;
// 初始化代码(如果需要)
}
Object[] r = getRow();
if(r == null)
{
setOutputDone();
return false;
}
// 创建输出行
r = createOutputRow(r, data.outputRowMeta.size());
try
{
// 获取输入的出生日期字段
String firstName = this.get(Fields.In, "firstName").getString(r);
String lastName = this.get(Fields.In, "lastName").getString(r);
String phone = this.get(Fields.In, "phone").getString(r);
String[] results = processFields(firstName, lastName, phone);
logDebug("full_name: " + results[0]); // 输出:张・三
logDebug("phone_masked: " + results[1]); // 输出:138****8000
logDebug("name_length: " + results[2]); // 输出:3
// 设置输出字段值
this.get(Fields.Out, "fullName").setValue(r, results[0]);
this.get(Fields.Out, "phoneMasked").setValue(r, results[1]);
this.get(Fields.Out, "nameLength").setValue(r, results[2]);
}
catch(Exception e)
{
// 修正logError方法调用,使用正确的参数形式
logError("处理出生日期时发生错误: " + e.getMessage(), e);
// 或者使用单参数形式: logError("处理出生日期时发生错误: " + e.getMessage());
}
// 将处理后的行传递到下一个步骤
putRow(data.outputRowMeta, r);
return true;
}
/**
* 处理字段转换,生成full_name、phone_masked和name_length
* @param firstName 名
* @param lastName 姓
* @param phone 手机号
* @return 包含三个结果的字符串数组,顺序为[fullName, phoneMasked, nameLength]
*/
private static String[] processFields(String firstName, String lastName, String phone)
{
// 处理full_name:拼接 lastName + "・" + firstName
String fullName = (lastName == null ? "" : lastName) + "・" + (firstName == null ? "" : firstName);
// 处理phone_masked:中间4位替换为*
String phoneMasked = maskPhone(phone);
// 处理name_length:计算full_name的字符长度
int nameLength = fullName.length();
return new String[]
{
fullName, phoneMasked, String.valueOf(nameLength)
};
}
/**
* 手机号中间4位替换为*
* @param phone 原始手机号
* @return 掩码后的手机号
*/
private static String maskPhone(String phone)
{
// 处理null或空字符串
if(phone == null || phone.trim().isEmpty())
{
return "";
}
// 只处理11位数字的手机号
String trimmedPhone = phone.trim();
if(trimmedPhone.length() == 11 && trimmedPhone.matches("\\d+"))
{
return trimmedPhone.substring(0, 3) + "****" + trimmedPhone.substring(7);
}
// 非11位数字的手机号返回原字符串
return phone;
}
四、mysql查询指南
bash
### PDI-CE-9.4.0 中 MySQL 数据处理操作说明
#### 1. Database join 动态传参查询
- **参数替换**:SQL 语句中使用 `?` 作为动态参数占位符
- **参数配置**:在 "the parameters to use" 中设置 `?` 对应的实际参数及数据类型
#### 2. 多表输入数据合并(以铜陵精讯收发存报表为例)
**场景**:合并入库、出库、库存等多表数据
**步骤**:
1. 多个表输入分别获取各数据源数据
2. 每个表输入后添加「排序记录」步骤,统一按 `area`、`wareType`、`warehouseName` 升序排序
3. 使用「Merge join」步骤,通过 `area`、`wareType`、`warehouseName` 作为连接字段合并数据
#### 3. 一次性获取 JSON 数组数据(替代逐条获取)
**SQL 示例**:
通过 `JSON_ARRAYAGG` 和 `JSON_OBJECT` 函数将查询结果聚合为 JSON 数组:
```sql
SELECT JSON_ARRAYAGG(
JSON_OBJECT(
'wareType', wareType,
'warehouseName', warehouseName,
'area', area,
'produceInroom', produceInroom
)
) AS merged_result
FROM (
-- 子查询:获取今日生产入库数据
SELECT
ps.in_type AS wareType,
wh.name AS warehouseName,
CASE ps.type WHEN 0 THEN '本部' WHEN 1 THEN '新区' ELSE ps.type END AS area,
IFNULL(SUM(ps.total_weight), 0) AS produceInroom
FROM jx_cloud_wms.jx_wms_product_in_room_list ps
LEFT JOIN jx_cloud_wms.jx_wms_basedata_product_warehouse wh
ON (wh.F_Id = ps.product_warehouse_id OR wh.code = ps.product_warehouse_id)
AND wh.F_DeleteMark = 0
AND wh.type = ps.type
WHERE ps.F_DeleteMark = 0
AND ps.F_EnabledMark = 1
AND ps.F_CreateDate >= '2025-09-17 00:00:00'
AND ps.F_CreateDate < '2025-09-18 00:00:00'
GROUP BY ps.type, ps.in_type, wh.name
) AS subquery;
```
**说明**:查询结果直接返回包含所有记录的 JSON 数组(字段包含 `wareType`、`warehouseName` 等),避免逐条处理数据。
五、javascript脚本 -- ES5语法的函数
bash
【javascript】组件注意事项
1、Kettle的JavaScript引擎基于Rhino(Java实现的JS引擎),不支持ES6+新语法(如箭头函数=>、let/const、模板字符串等)。
2、在JavaScript步骤中,所有输出字段必须在"Fields"标签页显式声明,否则会报错
3、demo 搞一个测试数据,合并成品1库和区域同名的线类型
/**
* 根据wareType、area和warehouseName合并多个数据源的数据
* @param {Array} dataSources 多个数据源数组(通过arguments接收)
* @returns {Array} 合并后的结果数组
*/
function mergData() {
// 创建一个映射表,用于根据唯一标识存储合并后的数据
var mergedMap = {}; // 用普通对象模拟Map功能(兼容旧环境)
// 遍历所有数据源(通过arguments获取传入的参数)
for (var i = 0; i < arguments.length; i++) {
var dataSource = arguments[i];
// 确保数据源是数组
if (Object.prototype.toString.call(dataSource) === '[object Array]') {
// 遍历数组中的每个项
for (var j = 0; j < dataSource.length; j++) {
var item = dataSource[j];
// 创建唯一标识键:wareType + area + warehouseName(用字符串拼接替代模板字符串)
var key = item.wareType + '-' + item.area + '-' + item.warehouseName;
if (mergedMap.hasOwnProperty(key)) {
// 如果已存在,合并属性(用循环复制替代扩展运算符)
var existingItem = mergedMap[key];
var mergedItem = {};
// 复制现有属性
for (var prop in existingItem) {
if (existingItem.hasOwnProperty(prop)) {
mergedItem[prop] = existingItem[prop];
}
}
// 合并新属性(覆盖旧属性)
for (var prop in item) {
if (item.hasOwnProperty(prop)) {
mergedItem[prop] = item[prop];
}
}
mergedMap[key] = mergedItem;
} else {
// 如果不存在,复制对象后添加(避免引用问题)
var newItem = {};
for (var prop in item) {
if (item.hasOwnProperty(prop)) {
newItem[prop] = item[prop];
}
}
mergedMap[key] = newItem;
}
}
}
}
// 将对象的值转换为数组并返回(替代Array.from(mergedMap.values()))
var result = [];
for (var key in mergedMap) {
if (mergedMap.hasOwnProperty(key)) {
result.push(mergedMap[key]);
}
}
return result;
}
function filterData(data) {
"use strict";
var mergeObj = {};
var result = [];
for (var i = 0; i < data.length; i++) {
var item = data[i];
var key = item.wareType + "-" + item.area;
var isProduct1Warehouse = (item.warehouseName === '成品1库');
var isAreaWarehouse = (item.warehouseName === item.area);
if (isProduct1Warehouse || isAreaWarehouse) {
if (!mergeObj[key]) {
var newItem = {};
for (var prop in item) {
if (item.hasOwnProperty(prop)) {
var val = item[prop];
newItem[prop] = (!isNaN(val) && val !== null && val !== '')
? Number(val) : val;
}
}
newItem._hasProduct1 = isProduct1Warehouse;
newItem._hasAreaWarehouse = isAreaWarehouse;
mergeObj[key] = newItem;
} else {
var existing = mergeObj[key];
var canMerge = (existing._hasProduct1 && isAreaWarehouse) ||
(existing._hasAreaWarehouse && isProduct1Warehouse);
if (canMerge) {
for (var prop in item) {
// 修复点:替换 includes() 为 indexOf()
var excludeProps = ['wareType', 'area', 'warehouseName'];
if (item.hasOwnProperty(prop) &&
excludeProps.indexOf(prop) === -1 && // 关键修复
!isNaN(item[prop])) {
existing[prop] = (existing[prop] || 0) + Number(item[prop]);
}
}
} else {
result.push(item);
}
}
} else {
result.push(item);
}
}
for (var key in mergeObj) {
if (mergeObj.hasOwnProperty(key)) {
var finalItem = mergeObj[key];
delete finalItem._hasProduct1;
delete finalItem._hasAreaWarehouse;
result.push(finalItem);
}
}
return result;
}
/**
* 生成合计和总计数据
*/
function mergeAndSumWithTotals(data) {
// 总计
var sum = {
area: "总计",
warehouseName: "总计",
wareType: "总计",
produceInroom: 0,
returnInroom: 0,
otherInroom: 0,
buyInroom: 0,
transInroom: 0,
inroom: 0,
shipOutroom: 0,
otherOutroom: 0,
transOutroom: 0
};
// 新区合计
var sumArea0 = {
area: "新区",
warehouseName: "合计",
wareType: "合计",
produceInroom: 0,
returnInroom: 0,
otherInroom: 0,
buyInroom: 0,
transInroom: 0,
inroom: 0,
shipOutroom: 0,
otherOutroom: 0,
transOutroom: 0
};
// 本部合计
var sumArea1 = {
area: "本部",
warehouseName: "合计",
wareType: "合计",
produceInroom: 0,
returnInroom: 0,
otherInroom: 0,
buyInroom: 0,
transInroom: 0,
inroom: 0,
shipOutroom: 0,
otherOutroom: 0,
transOutroom: 0
};
// 遍历原始数据,累加数值(使用for循环替代forEach)
for (var i = 0; i < data.length; i++) {
var item = data[i];
// 遍历对象属性(替代for...of)
for (var key in item) {
// 检查属性是否为对象自身属性
if (item.hasOwnProperty(key)) {
// 跳过不需要处理的字段(替代Array.includes)
if (key === "wareType" || key === "area" || key === "warehouseName") {
continue;
}
// 检查是否为数字
if (!isNaN(item[key])) {
var value = Number(item[key]);
// 累加至对应区域的合计
if (item.area === '本部') {
sumArea1[key] = (sumArea1[key] || 0) + value;
} else if (item.area === '新区') {
sumArea0[key] = (sumArea0[key] || 0) + value;
}
// 累加至总计
sum[key] = (sum[key] || 0) + value;
}
}
}
}
// 复制原始数据并添加合计和总计(替代扩展运算符)
var newData = [];
// 先复制原始数据
for (var j = 0; j < data.length; j++) {
newData.push(data[j]);
}
// 添加合计和总计
newData.push(sumArea0);
newData.push(sumArea1);
newData.push(sum);
return newData;
}
// var stock = [{ "area": "本部", "stock": 40.9, "wareType": "扁线", "warehouseName": "成品1库" }, { "area": "本部", "stock": 12.1, "wareType": "扁线", "warehouseName": "本部" }];
var shipOutroom = [
{ "wareType": "铝线", "area": "本部", "warehouseName": "成品1库", "shipOutroom": 40.9 },
{ "wareType": "铝线", "area": "本部", "warehouseName": "本部", "shipOutroom": 12.1 },
{ "wareType": "扁线", "area": "新区", "warehouseName": "成品1库", "shipOutroom": 25.3 }
];
var inroom = [
{ "wareType": "铝线", "area": "本部", "warehouseName": "成品1库", "inroom": 40.9 },
];
var produceInroom = [
{ "wareType": "铝线", "area": "本部", "warehouseName": "本部", "produceInroom": 40.9 },
];
var result = mergData(shipOutroom, inroom, produceInroom); //合并多数据源
result = filterData(result); //合并成品1库和同本区同名的线类
result = mergeAndSumWithTotals(result);
result = JSON.stringify(result);
六、js脚本批量存数据到mysql中
java
在一个js组件里面设置
var sqlQuery = "SELECT * FROM jx_wms_test";
setVariable("sql", sqlQuery, "r");
在下一个js组件里面获取
var val = getVariable("sql", "r");
// 注意:第二个参数是默认值,当变量不存在时使用
Alert("获取值: " + val);
之前一直不行的原因,居然是kettle的缓存,我重启电脑以后问题全部解决
定期清除在C:\Users\admin\.kettle文件夹,即使刚重启电脑,kettle文件保存以后运行的还是上一个版本的代码结果,很奇葩
三、demo
1、js代码里面定义sql
// 格式化日期为字符串,便于查看
function format(date) {
return date.toISOString().slice(0, 19).replace('T', ' ');
}
var now = new Date();// 获取当前时间
var todayStart = new Date(now);// 获取今天凌晨00:00:00
todayStart.setHours(0, 0, 0, 0);
var yesterdayStart = new Date(todayStart); // 获取昨天凌晨00:00:00
yesterdayStart.setDate(todayStart.getDate() - 1);
// 获取前两天凌晨00:00:00(本地时间)
var dayBeforeYesterdayStart = new Date(todayStart);
dayBeforeYesterdayStart.setDate(todayStart.getDate() - 2);
var dateStr = format(yesterdayStart).split(" ")[0];
var beforDay = format(dayBeforeYesterdayStart).split(" ")[0];
var currentTime = format(now);
// 今日生产入库sql语句,改为单行避免换行问题
var t1 = "SELECT ps.in_type as wareType,wh.name as warehouseName,CASE ps.type WHEN 0 THEN '本部' WHEN 1 THEN '新区' ELSE ps.type END AS area, ifnull(sum(ps.total_weight), 0) as produceInroom FROM jx_cloud_wms.jx_wms_product_in_room_list ps LEFT JOIN jx_cloud_wms.jx_wms_basedata_product_warehouse wh ON(wh.F_Id = ps.product_warehouse_id OR wh.code = ps.product_warehouse_id) AND wh.F_DeleteMark = 0 AND wh.type = ps.type WHERE ps.F_DeleteMark = 0 AND ps.F_EnabledMark = 1 AND ps.F_CreateDate like '"+beforDay+"%' GROUP BY ps.type, ps.in_type, wh.name";
在下面输出字段表明t1以及字符串类型
2、设置变量组件: 字段名称是t1 变量名是SSW
3、表输入组件 SQL: ${SSW} 勾选 替换SQL语句里的变量
4、验证成功