opencode: 工程测试、效率优先和安全生产

一、任务

周五，碰到同事，要从一堆的文件夹中找出90多个文件出来。

我在想，这个用opencode来试试，看看效果如何？

说明，我用的是deepseek v4 pro.

构建的prompt如下：

请帮我写一个python函数，签名是get_filter_files。输入参数3个，一个是文件夹路径directory，一个类型file_types,默认为 $"docx"$ ;

一个是flags,默认为 $""$ 。功能是，get_filter_files函数能取出directory路径下（包括各层子文件）中相应file_types，比如后缀为"docx"，也可以同时包括多个类型，比如"docx","xlsx",

且文件名包含flags的字符的文件名。这个函数写好后，文件命名为files.py,放到桌面上。

同时在桌面生成一个test_dir文件夹，用于测试这个函数的准确性。要求test_dir里面有3-10个子文件夹，子文件夹有随机1-3个子文件夹。此外，各生成50个docx\xlsx\ppt文件，文件名字以均以数字命名，这些150文件，随机存在里面的不同层级的文件夹中。并写一个测试用例，测试文件为files_test.py，用于说明这个函数能够通过测试，可用于生产。

很显然：

python 复制代码

def get_filter_files（directory, file_types =["docx"],flag =[""]）:
pass

另外，让他帮我生成我需要的测试文件，并进行相关的测试。

很快，opencode交付了相应的结果，花费了0.02元。

也许，您为什么要这样大费用周张，生成一个files.py还有files_test.py.

对了，我就是要看看代码的交付的质量。

二、交付结果

files.py如下：

python 复制代码

import os

def get_filter_files(directory, file_types=None, flags=None):
    if file_types is None:
        file_types = ["docx"]
    if flags is None:
        flags = [""]
    if not flags:
        flags = [""]

    file_types = [ft.strip().lstrip(".").lower() for ft in file_types]

    result = []
    for root, dirs, files in os.walk(directory):
        for filename in files:
            ext = filename.rsplit(".", 1)[-1] if "." in filename else ""
            if ext.lower() not in file_types:
                continue
            if "" not in flags and not any(flag in filename for flag in flags):
                continue
            result.append(os.path.join(root, filename))

    return result

files_test.py如下：

python 复制代码

import os
import sys
import random

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from files import get_filter_files

TEST_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_dir")


def check_duplicates(paths):
    """验证路径列表中无重复"""
    return len(paths) == len(set(paths))


def total_files():
    """获取test_dir下所有文件的总数"""
    count = 0
    for _, _, files in os.walk(TEST_DIR):
        count += len(files)
    return count


def test_default_docx():
    """默认参数：过滤docx文件"""
    result = get_filter_files(TEST_DIR)
    assert len(result) == 50, f"expected 50 docx, got {len(result)}"
    assert all(p.lower().endswith(".docx") for p in result)
    assert check_duplicates(result)


def test_default_returns_docx_only():
    """默认 file_types=["docx"] 验证"""
    result = get_filter_files(TEST_DIR, file_types=["docx"])
    assert len(result) == 50
    assert all(p.lower().endswith(".docx") for p in result)


def test_single_type_xlsx():
    """过滤xlsx文件"""
    result = get_filter_files(TEST_DIR, file_types=["xlsx"])
    assert len(result) == 50
    assert all(p.lower().endswith(".xlsx") for p in result)


def test_single_type_pptx():
    """过滤pptx文件"""
    result = get_filter_files(TEST_DIR, file_types=["pptx"])
    assert len(result) == 50
    assert all(p.lower().endswith(".pptx") for p in result)


def test_multiple_types():
    """同时过滤多种类型"""
    result = get_filter_files(TEST_DIR, file_types=["docx", "xlsx", "pptx"])
    assert len(result) == 150
    assert check_duplicates(result)


def test_extension_with_dot():
    """file_types带点也能正常工作"""
    result = get_filter_files(TEST_DIR, file_types=[".docx", ".xlsx"])
    assert len(result) == 100
    assert all(p.lower().endswith((".docx", ".xlsx")) for p in result)


def test_flag_number_filter():
    """flags过滤：按数字筛选"""
    result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["1"])
    for p in result:
        name = os.path.basename(p)
        assert "1" in name
        assert p.lower().endswith(".docx")


def test_flag_exact_match():
    """flags精确匹配某个文件"""
    result = get_filter_files(TEST_DIR, file_types=["xlsx"], flags=["25.xlsx"])
    assert len(result) == 1
    assert os.path.basename(result[0]) == "25.xlsx"


def test_multiple_flags():
    """多个flags条件"""
    result = get_filter_files(TEST_DIR, file_types=["pptx"], flags=["1", "2"])
    for p in result:
        name = os.path.basename(p)
        assert "1" in name or "2" in name
        assert p.lower().endswith(".pptx")


def test_no_match_flag():
    """flags不匹配任何文件------返回空列表"""
    result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["zzzz_not_exist"])
    assert result == []


def test_inexistent_directory():
    """目录不存在------返回空列表（不抛异常）"""
    result = get_filter_files("C:\\__no_such_dir__", file_types=["docx"])
    assert result == []


def test_empty_directory():
    """空目录------返回空列表"""
    empty_dir = os.path.join(TEST_DIR, "_empty_test")
    os.makedirs(empty_dir, exist_ok=True)
    result = get_filter_files(empty_dir, file_types=["docx"])
    assert result == []
    os.rmdir(empty_dir)


def test_no_args_uses_defaults():
    """不传任何参数时使用默认值"""
    result = get_filter_files(TEST_DIR)
    assert len(result) == 50
    assert all(p.lower().endswith(".docx") for p in result)


def test_call_count_consistency():
    """多次调用结果一致"""
    r1 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"])
    r2 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"])
    assert r1 == r2


def test_paths_are_absolute():
    """返回的路径都是绝对路径"""
    result = get_filter_files(TEST_DIR, file_types=["docx"])
    for p in result:
        assert os.path.isabs(p)


def test_case_insensitive_extension():
    """扩展名大小写不敏感"""
    result1 = get_filter_files(TEST_DIR, file_types=["DOCX"])
    result2 = get_filter_files(TEST_DIR, file_types=["docx"])
    assert len(result1) == len(result2) == 50


def test_edge_case_empty_flags_list():
    """flags为空列表时返回所有匹配类型的文件"""
    result = get_filter_files(TEST_DIR, file_types=["docx"], flags=[])
    assert len(result) == 50


def test_total_file_count():
    """验证测试目录总文件数 = 150"""
    assert total_files() == 150


if __name__ == "__main__":
    print(f"Test directory: {TEST_DIR}")
    print(f"Total files in test_dir: {total_files()}")
    print("-" * 50)

    tests = [
        ("test_default_docx", test_default_docx),
        ("test_default_returns_docx_only", test_default_returns_docx_only),
        ("test_single_type_xlsx", test_single_type_xlsx),
        ("test_single_type_pptx", test_single_type_pptx),
        ("test_multiple_types", test_multiple_types),
        ("test_extension_with_dot", test_extension_with_dot),
        ("test_flag_number_filter", test_flag_number_filter),
        ("test_flag_exact_match", test_flag_exact_match),
        ("test_multiple_flags", test_multiple_flags),
        ("test_no_match_flag", test_no_match_flag),
        ("test_inexistent_directory", test_inexistent_directory),
        ("test_empty_directory", test_empty_directory),
        ("test_no_args_uses_defaults", test_no_args_uses_defaults),
        ("test_call_count_consistency", test_call_count_consistency),
        ("test_paths_are_absolute", test_paths_are_absolute),
        ("test_case_insensitive_extension", test_case_insensitive_extension),
        ("test_edge_case_empty_flags_list", test_edge_case_empty_flags_list),
        ("test_total_file_count", test_total_file_count),
    ]

    passed = 0
    failed = 0
    for name, fn in tests:
        try:
            fn()
            print(f"[PASS] {name}")
            passed += 1
        except Exception as e:
            print(f"[FAIL] {name}: {e}")
            failed += 1

    print("-" * 50)
    print(f"Results: {passed} passed, {failed} failed out of {len(tests)}")
    if failed > 0:
        sys.exit(1)

三、评估

从测试用例上，这个比我想的案例要多。代码看起来没啥问题。

我以我自己的数据为例，构建了两个实例试了下：

实例1：

python 复制代码

directory = r"C:\Users\songroom\Desktop\My"
file_types = ["docx"]
flags =[""]
outputs = get_filter_files(directory, file_types, flags)
print(f"{len(outputs)}")
for file in outputs:
    print(file)

实例2：

python 复制代码

directory = r"C:\Users\songroom\Desktop\My"
file_types = ["docx"]
flags =["1","2","3","4","5","6","7","8","9","0"]
outputs = get_filter_files(directory, file_types, flags)
print(f"{len(outputs)}")
for file in outputs:
    print(file)

也进行了正常的过滤，把所有带数字的word文件过滤出来了。

总体上看，opencode + deepseek v4 pro总体上还是可以的。

四、优化

上面并只输出相应的文件的路径，并没有完成文件的复制，即没有生成新的文件，以及把符合条件的文件复制到目标文件夹。因此，还需要优化：

prompt:

我希望把上面的函数进行改造，如get_filter_files(source_directory, file_types=None,

flags=None,target_dir)，即把source_directory的文件，过滤后，复制到target_dir,如果target_dir不存在，就创建。请帮我优化一下。

优化的files.py

python 复制代码

import os
import shutil


def get_filter_files(source_directory, file_types=None, flags=None, target_dir=None):
    if file_types is None:
        file_types = ["docx"]
    if flags is None:
        flags = [""]
    if not flags:
        flags = [""]

    file_types = [ft.strip().lstrip(".").lower() for ft in file_types]

    result = []
    for root, dirs, files in os.walk(source_directory):
        for filename in files:
            ext = filename.rsplit(".", 1)[-1] if "." in filename else ""
            if ext.lower() not in file_types:
                continue
            if "" not in flags and not any(flag in filename for flag in flags):
                continue
            result.append(os.path.join(root, filename))

    if target_dir is not None:
        os.makedirs(target_dir, exist_ok=True)
        for src_path in result:
            shutil.copy2(src_path, os.path.join(target_dir, os.path.basename(src_path)))

    return result

更新后的files_test.py:

python 复制代码

import os
import sys
import shutil

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from files import get_filter_files

TEST_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_dir")
TARGET_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_output")


def cleanup_target():
    if os.path.exists(TARGET_DIR):
        shutil.rmtree(TARGET_DIR)


def check_duplicates(paths):
    return len(paths) == len(set(paths))


def total_files():
    count = 0
    for _, _, files in os.walk(TEST_DIR):
        count += len(files)
    return count


# ---- 原有测试：仅返回路径 ----

def test_default_docx():
    result = get_filter_files(TEST_DIR)
    assert len(result) == 50
    assert all(p.lower().endswith(".docx") for p in result)
    assert check_duplicates(result)


def test_single_type_xlsx():
    result = get_filter_files(TEST_DIR, file_types=["xlsx"])
    assert len(result) == 50
    assert all(p.lower().endswith(".xlsx") for p in result)


def test_multiple_types():
    result = get_filter_files(TEST_DIR, file_types=["docx", "xlsx", "pptx"])
    assert len(result) == 150


def test_extension_with_dot():
    result = get_filter_files(TEST_DIR, file_types=[".docx", ".xlsx"])
    assert len(result) == 100


def test_flag_filter():
    result = get_filter_files(TEST_DIR, file_types=["xlsx"], flags=["25.xlsx"])
    assert len(result) == 1
    assert os.path.basename(result[0]) == "25.xlsx"


def test_no_match_flag():
    result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["zzzz_not_exist"])
    assert result == []


def test_inexistent_directory():
    result = get_filter_files("C:\\__no_such_dir__")
    assert result == []


def test_no_args_uses_defaults():
    result = get_filter_files(TEST_DIR)
    assert len(result) == 50
    assert all(p.lower().endswith(".docx") for p in result)


def test_total_file_count():
    assert total_files() == 150


# ---- 新增测试：target_dir 复制 ----

def test_target_dir_creates_and_copies():
    cleanup_target()
    assert not os.path.exists(TARGET_DIR)

    result = get_filter_files(TEST_DIR, file_types=["docx"], target_dir=TARGET_DIR)
    assert len(result) == 50
    assert os.path.exists(TARGET_DIR)

    copied = os.listdir(TARGET_DIR)
    assert len(copied) == 50
    assert all(f.endswith(".docx") for f in copied)


def test_target_dir_none_returns_only():
    result = get_filter_files(TEST_DIR, file_types=["docx"], target_dir=None)
    assert len(result) == 50
    assert all(p.lower().endswith(".docx") for p in result)
    for p in result:
        assert os.path.exists(p)


def test_target_dir_multiple_types():
    cleanup_target()
    result = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"], target_dir=TARGET_DIR)
    assert len(result) == 100
    copied = os.listdir(TARGET_DIR)
    assert len(copied) == 100
    docx_count = sum(1 for f in copied if f.endswith(".docx"))
    xlsx_count = sum(1 for f in copied if f.endswith(".xlsx"))
    assert docx_count == 50
    assert xlsx_count == 50


def test_target_dir_with_flags():
    cleanup_target()
    result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["1"], target_dir=TARGET_DIR)
    copied = os.listdir(TARGET_DIR)
    for f in copied:
        assert "1" in f
        assert f.endswith(".docx")


def test_target_dir_overwrite():
    cleanup_target()
    os.makedirs(TARGET_DIR)
    dummy = os.path.join(TARGET_DIR, "1.docx")
    with open(dummy, "w") as f:
        f.write("old_content")
    assert os.path.getsize(dummy) > 0

    get_filter_files(TEST_DIR, file_types=["docx"], flags=["1.docx"], target_dir=TARGET_DIR)

    assert os.path.exists(dummy)


def test_return_value_consistent():
    cleanup_target()
    r1 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"], target_dir=TARGET_DIR)
    r2 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"])
    assert r1 == r2


if __name__ == "__main__":
    print(f"Test directory: {TEST_DIR}")
    print(f"Total files in test_dir: {total_files()}")

    tests = [
        ("test_default_docx", test_default_docx),
        ("test_single_type_xlsx", test_single_type_xlsx),
        ("test_multiple_types", test_multiple_types),
        ("test_extension_with_dot", test_extension_with_dot),
        ("test_flag_filter", test_flag_filter),
        ("test_no_match_flag", test_no_match_flag),
        ("test_inexistent_directory", test_inexistent_directory),
        ("test_no_args_uses_defaults", test_no_args_uses_defaults),
        ("test_total_file_count", test_total_file_count),
        ("test_target_dir_creates_and_copies", test_target_dir_creates_and_copies),
        ("test_target_dir_none_returns_only", test_target_dir_none_returns_only),
        ("test_target_dir_multiple_types", test_target_dir_multiple_types),
        ("test_target_dir_with_flags", test_target_dir_with_flags),
        ("test_target_dir_overwrite", test_target_dir_overwrite),
        ("test_return_value_consistent", test_return_value_consistent),
    ]

    passed = 0
    failed = 0
    for name, fn in tests:
        try:
            fn()
            print(f"[PASS] {name}")
            passed += 1
        except Exception as e:
            print(f"[FAIL] {name}: {e}")
            failed += 1

    cleanup_target()
    print("-" * 50)
    print(f"Results: {passed} passed, {failed} failed out of {len(tests)}")
    if failed > 0:
        sys.exit(1)

以上总共花费0.03元。

五、最后：AI模式下安全生产

大模型作为黑箱产品，只输入prompt，感觉心里还是没底，进行自我验证，还是会更放心一点。

但更为重要的是，我们在生产环境中，出于对网络安全的考虑，我们不能直接让 opencode这些agent运行，我们还是需要让他在隔离的沙箱环境下，生成相应的可信代码，我们再把代码放到生产环境中。

另外，deepseek 性价比还真是高。

AI下效率优先的前提是安全生产。