1. 解压( root@master install#tar -zxvf datax.tar.gz****)****
2.
- 清理插件操作,有助于维护datax配置整洁 。
cd datax
find plugin/reader/ -type f -name "._*er" | xargs rm -rf
find plugin/writer/ -type f -name "._*er" | xargs rm -rf
3.测试运行datax自带的job.json
root@master datax# python bin/datax.py /bigdata/datax/job/job.json

4.将数据从MySQL中导入到HDFS上
MySQL duoduo_db里面的t_roles

/bigdata/datax 目录下面的 新建文件vim mysql_to_hdfs.json
内容写下面
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"rid",
"rname"
],
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://tonymin:3306/duoduo_db"
],
"table": [
"t_roles"
]
}
],
"password": "Mzp_2022!",
"username": "root"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{
"name": "rid",
"type": "int"
},
{
"name": "rname",
"type": "string"
}
],
"defaultFS": "hdfs://tonymin:9820",
"fieldDelimiter": "\t",
"fileName": "roles.txt",
"fileType": "text",
"path": "/datax_transfer/",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
里面的"defaultFS": "hdfs://tonymin:9820", 这个端口号可以通过以下命令(关于defaultFS的配置)获得
hdfs getconf -confKey fs.defaultFS

python bin/datax.py mysql_to_hdfs.json
上面这个代码是改了模板获得的。模板是下面这句话获得的
python bin/datax.py -r mysqlreader -w hdfswriter
5. 将数据从HDFS上迁移回MySQL
先重新命名文件:
hdfs dfs -mv /roles.txt__4d0639d8_4341_4dc1_95e4_b770be4a946f /roles.txt
再查看roles.txt文件:
hdfs dfs -cat /roles.txt
查看模板
python bin/datax.py -r hdfsreader -w mysqlwriter
{
"job": {
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"column": "\*",
"defaultFS": "hdfs://tonymin:9820",
"encoding": "UTF-8",
"fieldDelimiter": "\t",
"fileType": "text",
"path": "/roles.txt"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": "rid","rname",
"connection": [
{
"jdbcUrl": "jdbc:mysql://tonymin:3306/duoduo_db",
"table": "t_roles"
}
],
"password": "Mzp_2022!",
"username": "root",
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
python bin/datax.py hdfs_to_mysql.json