一、任务
周五,碰到同事,要从一堆的文件夹中找出90多个文件出来。
我在想,这个用opencode来试试,看看效果如何?
说明,我用的是deepseek v4 pro.
构建的prompt如下:
请帮我写一个python函数,签名是get_filter_files。输入参数3个,一个是文件夹路径directory,一个类型file_types,默认为"docx";
一个是flags,默认为""。功能是,get_filter_files函数能取出directory路径下(包括各层子文件)中相应file_types,比如后缀为"docx",也可以同时包括多个类型,比如"docx","xlsx",
且文件名包含flags的字符的文件名。这个函数写好后,文件命名为files.py,放到桌面上。
同时在桌面生成一个test_dir文件夹,用于测试这个函数的准确性。要求test_dir里面有3-10个子文件夹,子文件夹有随机1-3个子文件夹。此外,各生成50个docx\xlsx\ppt文件,文件名字以均以数字命名,这些150文件,随机存在里面的不同层级的文件夹中。并写一个测试用例,测试文件为files_test.py,用于说明这个函数能够通过测试,可用于生产。

很显然:
python
def get_filter_files(directory, file_types =["docx"],flag =[""]):
pass
另外,让他帮我生成我需要的测试文件,并进行相关的测试。
很快,opencode交付了相应的结果,花费了0.02元。
也许,您为什么要这样大费用周张,生成一个files.py还有files_test.py.
对了,我就是要看看代码的交付的质量。
二、交付结果
files.py如下:
python
import os
def get_filter_files(directory, file_types=None, flags=None):
if file_types is None:
file_types = ["docx"]
if flags is None:
flags = [""]
if not flags:
flags = [""]
file_types = [ft.strip().lstrip(".").lower() for ft in file_types]
result = []
for root, dirs, files in os.walk(directory):
for filename in files:
ext = filename.rsplit(".", 1)[-1] if "." in filename else ""
if ext.lower() not in file_types:
continue
if "" not in flags and not any(flag in filename for flag in flags):
continue
result.append(os.path.join(root, filename))
return result
files_test.py如下:
python
import os
import sys
import random
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from files import get_filter_files
TEST_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_dir")
def check_duplicates(paths):
"""验证路径列表中无重复"""
return len(paths) == len(set(paths))
def total_files():
"""获取test_dir下所有文件的总数"""
count = 0
for _, _, files in os.walk(TEST_DIR):
count += len(files)
return count
def test_default_docx():
"""默认参数:过滤docx文件"""
result = get_filter_files(TEST_DIR)
assert len(result) == 50, f"expected 50 docx, got {len(result)}"
assert all(p.lower().endswith(".docx") for p in result)
assert check_duplicates(result)
def test_default_returns_docx_only():
"""默认 file_types=["docx"] 验证"""
result = get_filter_files(TEST_DIR, file_types=["docx"])
assert len(result) == 50
assert all(p.lower().endswith(".docx") for p in result)
def test_single_type_xlsx():
"""过滤xlsx文件"""
result = get_filter_files(TEST_DIR, file_types=["xlsx"])
assert len(result) == 50
assert all(p.lower().endswith(".xlsx") for p in result)
def test_single_type_pptx():
"""过滤pptx文件"""
result = get_filter_files(TEST_DIR, file_types=["pptx"])
assert len(result) == 50
assert all(p.lower().endswith(".pptx") for p in result)
def test_multiple_types():
"""同时过滤多种类型"""
result = get_filter_files(TEST_DIR, file_types=["docx", "xlsx", "pptx"])
assert len(result) == 150
assert check_duplicates(result)
def test_extension_with_dot():
"""file_types带点也能正常工作"""
result = get_filter_files(TEST_DIR, file_types=[".docx", ".xlsx"])
assert len(result) == 100
assert all(p.lower().endswith((".docx", ".xlsx")) for p in result)
def test_flag_number_filter():
"""flags过滤:按数字筛选"""
result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["1"])
for p in result:
name = os.path.basename(p)
assert "1" in name
assert p.lower().endswith(".docx")
def test_flag_exact_match():
"""flags精确匹配某个文件"""
result = get_filter_files(TEST_DIR, file_types=["xlsx"], flags=["25.xlsx"])
assert len(result) == 1
assert os.path.basename(result[0]) == "25.xlsx"
def test_multiple_flags():
"""多个flags条件"""
result = get_filter_files(TEST_DIR, file_types=["pptx"], flags=["1", "2"])
for p in result:
name = os.path.basename(p)
assert "1" in name or "2" in name
assert p.lower().endswith(".pptx")
def test_no_match_flag():
"""flags不匹配任何文件------返回空列表"""
result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["zzzz_not_exist"])
assert result == []
def test_inexistent_directory():
"""目录不存在------返回空列表(不抛异常)"""
result = get_filter_files("C:\\__no_such_dir__", file_types=["docx"])
assert result == []
def test_empty_directory():
"""空目录------返回空列表"""
empty_dir = os.path.join(TEST_DIR, "_empty_test")
os.makedirs(empty_dir, exist_ok=True)
result = get_filter_files(empty_dir, file_types=["docx"])
assert result == []
os.rmdir(empty_dir)
def test_no_args_uses_defaults():
"""不传任何参数时使用默认值"""
result = get_filter_files(TEST_DIR)
assert len(result) == 50
assert all(p.lower().endswith(".docx") for p in result)
def test_call_count_consistency():
"""多次调用结果一致"""
r1 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"])
r2 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"])
assert r1 == r2
def test_paths_are_absolute():
"""返回的路径都是绝对路径"""
result = get_filter_files(TEST_DIR, file_types=["docx"])
for p in result:
assert os.path.isabs(p)
def test_case_insensitive_extension():
"""扩展名大小写不敏感"""
result1 = get_filter_files(TEST_DIR, file_types=["DOCX"])
result2 = get_filter_files(TEST_DIR, file_types=["docx"])
assert len(result1) == len(result2) == 50
def test_edge_case_empty_flags_list():
"""flags为空列表时返回所有匹配类型的文件"""
result = get_filter_files(TEST_DIR, file_types=["docx"], flags=[])
assert len(result) == 50
def test_total_file_count():
"""验证测试目录总文件数 = 150"""
assert total_files() == 150
if __name__ == "__main__":
print(f"Test directory: {TEST_DIR}")
print(f"Total files in test_dir: {total_files()}")
print("-" * 50)
tests = [
("test_default_docx", test_default_docx),
("test_default_returns_docx_only", test_default_returns_docx_only),
("test_single_type_xlsx", test_single_type_xlsx),
("test_single_type_pptx", test_single_type_pptx),
("test_multiple_types", test_multiple_types),
("test_extension_with_dot", test_extension_with_dot),
("test_flag_number_filter", test_flag_number_filter),
("test_flag_exact_match", test_flag_exact_match),
("test_multiple_flags", test_multiple_flags),
("test_no_match_flag", test_no_match_flag),
("test_inexistent_directory", test_inexistent_directory),
("test_empty_directory", test_empty_directory),
("test_no_args_uses_defaults", test_no_args_uses_defaults),
("test_call_count_consistency", test_call_count_consistency),
("test_paths_are_absolute", test_paths_are_absolute),
("test_case_insensitive_extension", test_case_insensitive_extension),
("test_edge_case_empty_flags_list", test_edge_case_empty_flags_list),
("test_total_file_count", test_total_file_count),
]
passed = 0
failed = 0
for name, fn in tests:
try:
fn()
print(f"[PASS] {name}")
passed += 1
except Exception as e:
print(f"[FAIL] {name}: {e}")
failed += 1
print("-" * 50)
print(f"Results: {passed} passed, {failed} failed out of {len(tests)}")
if failed > 0:
sys.exit(1)
三、评估
从测试用例上,这个比我想的案例要多。代码看起来没啥问题。
我以我自己的数据为例,构建了两个实例试了下:
实例1:
python
directory = r"C:\Users\songroom\Desktop\My"
file_types = ["docx"]
flags =[""]
outputs = get_filter_files(directory, file_types, flags)
print(f"{len(outputs)}")
for file in outputs:
print(file)
实例2:
python
directory = r"C:\Users\songroom\Desktop\My"
file_types = ["docx"]
flags =["1","2","3","4","5","6","7","8","9","0"]
outputs = get_filter_files(directory, file_types, flags)
print(f"{len(outputs)}")
for file in outputs:
print(file)
也进行了正常的过滤,把所有带数字的word文件过滤出来了。
总体上看 ,opencode + deepseek v4 pro总体上还是可以的。
四、优化
上面并只输出相应的文件的路径,并没有完成文件的复制,即没有生成新的文件,以及把符合条件的文件复制到目标文件夹。因此,还需要优化:
prompt:
我希望把上面的函数进行改造,如get_filter_files(source_directory, file_types=None,
flags=None,target_dir),即把source_directory的文件,过滤后,复制到target_dir,如果target_dir不存在,就创建。请帮我优化一下。

python
import os
import shutil
def get_filter_files(source_directory, file_types=None, flags=None, target_dir=None):
if file_types is None:
file_types = ["docx"]
if flags is None:
flags = [""]
if not flags:
flags = [""]
file_types = [ft.strip().lstrip(".").lower() for ft in file_types]
result = []
for root, dirs, files in os.walk(source_directory):
for filename in files:
ext = filename.rsplit(".", 1)[-1] if "." in filename else ""
if ext.lower() not in file_types:
continue
if "" not in flags and not any(flag in filename for flag in flags):
continue
result.append(os.path.join(root, filename))
if target_dir is not None:
os.makedirs(target_dir, exist_ok=True)
for src_path in result:
shutil.copy2(src_path, os.path.join(target_dir, os.path.basename(src_path)))
return result
更新后的files_test.py:
python
import os
import sys
import shutil
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from files import get_filter_files
TEST_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_dir")
TARGET_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_output")
def cleanup_target():
if os.path.exists(TARGET_DIR):
shutil.rmtree(TARGET_DIR)
def check_duplicates(paths):
return len(paths) == len(set(paths))
def total_files():
count = 0
for _, _, files in os.walk(TEST_DIR):
count += len(files)
return count
# ---- 原有测试:仅返回路径 ----
def test_default_docx():
result = get_filter_files(TEST_DIR)
assert len(result) == 50
assert all(p.lower().endswith(".docx") for p in result)
assert check_duplicates(result)
def test_single_type_xlsx():
result = get_filter_files(TEST_DIR, file_types=["xlsx"])
assert len(result) == 50
assert all(p.lower().endswith(".xlsx") for p in result)
def test_multiple_types():
result = get_filter_files(TEST_DIR, file_types=["docx", "xlsx", "pptx"])
assert len(result) == 150
def test_extension_with_dot():
result = get_filter_files(TEST_DIR, file_types=[".docx", ".xlsx"])
assert len(result) == 100
def test_flag_filter():
result = get_filter_files(TEST_DIR, file_types=["xlsx"], flags=["25.xlsx"])
assert len(result) == 1
assert os.path.basename(result[0]) == "25.xlsx"
def test_no_match_flag():
result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["zzzz_not_exist"])
assert result == []
def test_inexistent_directory():
result = get_filter_files("C:\\__no_such_dir__")
assert result == []
def test_no_args_uses_defaults():
result = get_filter_files(TEST_DIR)
assert len(result) == 50
assert all(p.lower().endswith(".docx") for p in result)
def test_total_file_count():
assert total_files() == 150
# ---- 新增测试:target_dir 复制 ----
def test_target_dir_creates_and_copies():
cleanup_target()
assert not os.path.exists(TARGET_DIR)
result = get_filter_files(TEST_DIR, file_types=["docx"], target_dir=TARGET_DIR)
assert len(result) == 50
assert os.path.exists(TARGET_DIR)
copied = os.listdir(TARGET_DIR)
assert len(copied) == 50
assert all(f.endswith(".docx") for f in copied)
def test_target_dir_none_returns_only():
result = get_filter_files(TEST_DIR, file_types=["docx"], target_dir=None)
assert len(result) == 50
assert all(p.lower().endswith(".docx") for p in result)
for p in result:
assert os.path.exists(p)
def test_target_dir_multiple_types():
cleanup_target()
result = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"], target_dir=TARGET_DIR)
assert len(result) == 100
copied = os.listdir(TARGET_DIR)
assert len(copied) == 100
docx_count = sum(1 for f in copied if f.endswith(".docx"))
xlsx_count = sum(1 for f in copied if f.endswith(".xlsx"))
assert docx_count == 50
assert xlsx_count == 50
def test_target_dir_with_flags():
cleanup_target()
result = get_filter_files(TEST_DIR, file_types=["docx"], flags=["1"], target_dir=TARGET_DIR)
copied = os.listdir(TARGET_DIR)
for f in copied:
assert "1" in f
assert f.endswith(".docx")
def test_target_dir_overwrite():
cleanup_target()
os.makedirs(TARGET_DIR)
dummy = os.path.join(TARGET_DIR, "1.docx")
with open(dummy, "w") as f:
f.write("old_content")
assert os.path.getsize(dummy) > 0
get_filter_files(TEST_DIR, file_types=["docx"], flags=["1.docx"], target_dir=TARGET_DIR)
assert os.path.exists(dummy)
def test_return_value_consistent():
cleanup_target()
r1 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"], target_dir=TARGET_DIR)
r2 = get_filter_files(TEST_DIR, file_types=["docx", "xlsx"])
assert r1 == r2
if __name__ == "__main__":
print(f"Test directory: {TEST_DIR}")
print(f"Total files in test_dir: {total_files()}")
tests = [
("test_default_docx", test_default_docx),
("test_single_type_xlsx", test_single_type_xlsx),
("test_multiple_types", test_multiple_types),
("test_extension_with_dot", test_extension_with_dot),
("test_flag_filter", test_flag_filter),
("test_no_match_flag", test_no_match_flag),
("test_inexistent_directory", test_inexistent_directory),
("test_no_args_uses_defaults", test_no_args_uses_defaults),
("test_total_file_count", test_total_file_count),
("test_target_dir_creates_and_copies", test_target_dir_creates_and_copies),
("test_target_dir_none_returns_only", test_target_dir_none_returns_only),
("test_target_dir_multiple_types", test_target_dir_multiple_types),
("test_target_dir_with_flags", test_target_dir_with_flags),
("test_target_dir_overwrite", test_target_dir_overwrite),
("test_return_value_consistent", test_return_value_consistent),
]
passed = 0
failed = 0
for name, fn in tests:
try:
fn()
print(f"[PASS] {name}")
passed += 1
except Exception as e:
print(f"[FAIL] {name}: {e}")
failed += 1
cleanup_target()
print("-" * 50)
print(f"Results: {passed} passed, {failed} failed out of {len(tests)}")
if failed > 0:
sys.exit(1)
以上总共花费0.03元。
五、最后:AI模式下安全生产
大模型作为黑箱产品 ,只输入prompt,感觉心里还是没底,进行自我验证,还是会更放心一点。
但更为重要的是,我们在生产环境中,出于对网络安全的考虑,我们不能直接让 opencode这些agent运行,我们还是需要让他在隔离的沙箱环境下,生成相应的可信代码,我们再把代码放到生产环境中。
另外,deepseek 性价比还真是高。
AI下效率优先的前提是安全生产。