安装
sh
tar -zxvf datax.tar.gz -C /opt/module/
测试
sh
python /opt/module/datax/bin/datax.py /opt/module/datax/job/job.json
MySQLToHDFS
根据官方文档写配置json
json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"id",
"name",
"region_id",
"area_code",
"iso_code",
"iso_3166_2",
"create_time",
"operate_time"
],
"where": "id>=3",
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://hadoop102:3306/gmall?useUnicode=true&allowPublicKeyRetrieval=true&characterEncoding=utf-8"
],
"table": [
"base_province"
]
}
],
"password": "000000",
"splitPk": "",
"username": "root"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "region_id",
"type": "string"
},
{
"name": "area_code",
"type": "string"
},
{
"name": "iso_code",
"type": "string"
},
{
"name": "iso_3166_2",
"type": "string"
},
{
"name": "create_time",
"type": "string"
},
{
"name": "operate_time",
"type": "string"
}
],
"compress": "gzip",
"defaultFS": "hdfs://hadoop102:8020",
"fieldDelimiter": "\t",
"fileName": "base_province",
"fileType": "text",
"path": "/base_province",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": 1
}
}
}
}
确保路径存在
sh
hadoop fs -mkdir /base_province
sh
cd /opt/module/datax
python bin/datax.py job/base_province.json
sh
hadoop fs -mkdir /base_province
使用SQL
json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://hadoop102:3306/gmall?useUnicode=true&allowPublicKeyRetrieval=true&characterEncoding=utf-8"
],
"querySql": [
"select id,name,region_id,area_code,iso_code,iso_3166_2,create_time,operate_time from base_province where id>=3"
]
}
],
"password": "000000",
"username": "root"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "region_id",
"type": "string"
},
{
"name": "area_code",
"type": "string"
},
{
"name": "iso_code",
"type": "string"
},
{
"name": "iso_3166_2",
"type": "string"
},
{
"name": "create_time",
"type": "string"
},
{
"name": "operate_time",
"type": "string"
}
],
"compress": "gzip",
"defaultFS": "hdfs://hadoop102:8020",
"fieldDelimiter": "\t",
"fileName": "base_province",
"fileType": "text",
"path": "/base_province",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": 1
}
}
}
}
传时间参数
sh
python bin/datax.py -p"-Ddt=2022-06-08" job/base_province.json
json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://hadoop102:3306/gmall?useUnicode=true&allowPublicKeyRetrieval=true&characterEncoding=utf-8"
],
"querySql": [
"select id,name,region_id,area_code,iso_code,iso_3166_2,create_time,operate_time from base_province where id>=3"
]
}
],
"password": "000000",
"username": "root"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "region_id",
"type": "string"
},
{
"name": "area_code",
"type": "string"
},
{
"name": "iso_code",
"type": "string"
},
{
"name": "iso_3166_2",
"type": "string"
},
{
"name": "create_time",
"type": "string"
},
{
"name": "operate_time",
"type": "string"
}
],
"compress": "gzip",
"defaultFS": "hdfs://hadoop102:8020",
"fieldDelimiter": "\t",
"fileName": "base_province",
"fileType": "text",
"path": "/base_province/${dt}",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": 1
}
}
}
}
HDFSToMySQL
json
{
"job": {
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"defaultFS": "hdfs://hadoop102:8020",
"path": "/base_province",
"column": [
"*"
],
"fileType": "text",
"compress": "gzip",
"encoding": "UTF-8",
"nullFormat": "\\N",
"fieldDelimiter": "\t"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"username": "root",
"password": "000000",
"connection": [
{
"table": [
"test_province"
],
"jdbcUrl": "jdbc:mysql://hadoop102:3306/gmall?useUnicode=true&allowPublicKeyRetrieval=true&characterEncoding=utf-8"
}
],
"column": [
"id",
"name",
"region_id",
"area_code",
"iso_code",
"iso_3166_2",
"create_time",
"operate_time"
],
"writeMode": "replace"
}
}
}
],
"setting": {
"speed": {
"channel": 1
}
}
}
}
在MySQL中创建gmall.test_province表
c
DROP TABLE
IF
EXISTS `test_province`;
CREATE TABLE `test_province` (
`id` BIGINT ( 20 ) NOT NULL,
`name` VARCHAR ( 20 ) CHARACTER
SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`region_id` VARCHAR ( 20 ) CHARACTER
SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`area_code` VARCHAR ( 20 ) CHARACTER
SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`iso_code` VARCHAR ( 20 ) CHARACTER
SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`iso_3166_2` VARCHAR ( 20 ) CHARACTER
SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`create_time` VARCHAR ( 20 ) CHARACTER
SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
`operate_time` VARCHAR ( 20 ) CHARACTER
SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
PRIMARY KEY ( `id` )
) ENGINE = INNODB CHARACTER
SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;
sh
python bin/datax.py job/test_province.json
DataX配置生成器
类似代码生成器,生成DataX对应的JSON