argo 使用yml配置编排任务

前端使用一个可以拖拽的编排工具，每个节点会有镜像和启动命令参数的配置

定义一个dag模板

复制代码

apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
  name: ml-{{randomNumber}}
  generateName: dag-diamond-{{randomNumber}}
spec:
  entrypoint: main
  templates:
  - name: main
    dag:
      tasks:
      {% for item in task_arr %}
      - name: {{item.name}}
        template: {{item.template}}
        dependencies: {{item.dependencies}}
        arguments:
          parameters: {{item.parameters}}
      {% endfor %}
  {% for item in task_templates %}
  - name: {{item.name}}
    inputs:
      parameters:
      {% for parameter in item.parameters %}
      - name: {{parameter}}
      {% endfor %}
    container:
      name: {{item.name}}
      image: {{item.image}}
      command: {{item.command}}
  {% endfor %}

编写代码把前端接收到的图信息写入到dag-xxx.yml

复制代码

def uploadArgoYml(etl_pipeline_id, dag_json):

    s3Host = conf.get('MINIO_HOST', '')
    s3AccessKey = conf.get('MINIO_ACCESS_KEY', '')
    s3SecretKey = conf.get('MINIO_SECRET_KEY', '')

    if "http://" in s3Host or "https://" in s3Host:
        s3Host = s3Host.split("//", 1)[-1]

        client = Minio(
            s3Host,
            access_key=s3AccessKey,
            secret_key=s3SecretKey,
            secure=False
        )
        S3_file_path = None
        # 使用 task_templates + 参数作为启动
        task_arr = []
        # 增加启动镜像+command作为模板
        task_templates = []
        try:
            from jinja2 import Template
            from pinyin import pinyin
            import yaml
            print(f'current_directory:{current_directory}')
            script_directory = os.path.dirname(os.path.realpath(__file__))

            with open(f'{script_directory}/../templates/pipeline_task/dag_template.yml') as rf:
                template_content = rf.read()

            template = Template(template_content)
            # 第一次遍历先添加template
            for key, value in dag_json.items():
                templateKey = pinyin.get(key, format="strip", delimiter='')
                notebok_image_name = value.get('task-config').get('image_id')
                if notebok_image_name is not None:
                    notebook = db.session.query(Notebook).filter_by(name=notebok_image_name).first()
                    image = eval(notebook.image_save_path)[0]
                else:
                    image = 'ray:test'
                command = value.get('task-config').get('command', '')
                parameters = value.get('task-config').get('parameters', '')
                task_templates.append({
                    'name': templateKey,
                    'image': image,
                    'command': command.split(" "),
                    'parameters': ['params']
                })
                task_config = value['task-config']  # 获取字典对象
                dependencies = []
                for item in value['upstream']:
                    label = pinyin.get(item, format="strip")
                    for k, v in dag_json.items():
                        if pinyin.get(k, format="strip", delimiter='') == label:
                            dependencies.append(f'task{v.get("task_id")}')

                parameters = [{'name': key, 'value': value} for key, value in task_config.items()]
                task_arr.append({
                    "name": f'"task{value.get("task_id")}"',
                    "template": templateKey,
                    "dependencies": dependencies,
                    "parameters": [{'name': 'params', 'value': parameters}]
                })
            import random
            import string

            def generate_random_string():
                # 包含小写字母和数字的字符串
                characters = string.ascii_lowercase + string.digits
                random_string = ''.join(random.choice(characters) for _ in range(8))  # 生成8个字符的随机字符串
                # 确保名称以字母或数字开头和结尾
                random_string = random.choice(string.ascii_lowercase + string.digits) + random_string[1:]
                random_string = random_string[:-1] + random.choice(string.ascii_lowercase + string.digits)
                return random_string

            variables = {
                'task_arr': task_arr,
                'task_templates': task_templates,
                'etl_pipeline_id': etl_pipeline_id,
                'randomNumber': generate_random_string()
            }
            rendered_content = template.render(variables)

            local_file_path = f'{script_directory}/../templates/pipeline_task/dag_pipeline-{etl_pipeline_id}.yml'
            with open(local_file_path, 'w') as wf:
                wf.write('\n'.join(line for line in rendered_content.split('\n') if line.strip()))

            bucket_name = "xxxx"
            dir_path = 'PIPELINE_TASK'
            S3_file_path = f'{dir_path}/{etl_pipeline_id}/task-{etl_pipeline_id}.yml'
            print(f'S3_file_path:{S3_file_path}')
            file_length = os.stat(local_file_path).st_size
            client.put_object(bucket_name, S3_file_path, open(local_file_path, 'rb'), length=file_length)

        except Exception as err:
            print('Error occurred:', err)

    else:
        print('No s3Host parameter value provided')

    return S3_file_path

提交到argo

复制代码

    def submit_pipeline(self, etl_pipeline_id):
        # 从minio上下载dag.yml 转为
        from pathlib import Path
        import os
        import yaml
        argo_server_url = conf.get('ARGO_SERVER', '')
        script_directory = os.path.dirname(os.path.realpath(__file__))
        local_file_path = f'{script_directory}/../templates/pipeline_task/dag_pipeline-{etl_pipeline_id}.yml'
        #manifest = Path(local_file_path).read_text()

        config = Configuration(host=argo_server_url)
        client = ApiClient(configuration=config)
        service = WorkflowServiceApi(api_client=client)

        with open(local_file_path) as f:
            manifest: dict = yaml.safe_load(f)

        if 'spec' in manifest and 'serviceAccountName' in manifest['spec']:
            del manifest['spec']['serviceAccountName']
        service.create_workflow('argo', V1alpha1WorkflowCreateRequest(workflow=manifest))
        workflow_id = manifest.get('metadata')['name']

        return "", f"{argo_server_url}/workflows/argo/{workflow_id}?tab=workflow"

查看argo可以看到编排的作业