聊聊Spring AI Alibaba的FeiShuDocumentReader

本文主要研究一下Spring AI Alibaba的FeiShuDocumentReader

FeiShuDocumentReader

community/document-readers/spring-ai-alibaba-starter-document-reader-larksuite/src/main/java/com/alibaba/cloud/ai/reader/feishu/FeiShuDocumentReader.java

复制代码
public class FeiShuDocumentReader implements DocumentReader {

	private static final Logger log = LoggerFactory.getLogger(FeiShuDocumentReader.class);

	private final FeiShuResource feiShuResource;

	private final Client client;

	private String documentId;

	private String userAccessToken;

	private String tenantAccessToken;

	public FeiShuDocumentReader(FeiShuResource feiShuResource) {
		this.feiShuResource = feiShuResource;
		this.client = feiShuResource.buildDefaultFeiShuClient();
	}

	public FeiShuDocumentReader(FeiShuResource feiShuResource, String documentId, String userAccessToken,
			String tenantAccessToken) {
		this(feiShuResource);
		this.documentId = documentId;
		this.userAccessToken = userAccessToken;
		this.tenantAccessToken = tenantAccessToken;
	}

	public FeiShuDocumentReader(FeiShuResource feiShuResource, String userAccessToken) {
		this(feiShuResource);
		this.userAccessToken = userAccessToken;
	}

	public FeiShuDocumentReader(FeiShuResource feiShuResource, String userAccessToken, String documentId) {
		this(feiShuResource);
		this.userAccessToken = userAccessToken;
		this.documentId = documentId;
	}

	/**
	 * use tenant_access_token access [tenant identity]
	 * @param documentId documentId
	 * @param userAccessToken userAccessToken
	 * @return String
	 */
	public Document getDocumentContentByUser(String documentId, String userAccessToken) throws Exception {
		RawContentDocumentReq req = RawContentDocumentReq.newBuilder().documentId(documentId).lang(0).build();

		RawContentDocumentResp resp = client.docx()
			.document()
			.rawContent(req, RequestOptions.newBuilder().userAccessToken(userAccessToken).build());
		if (!resp.success()) {
			System.out.printf("code:%s,msg:%s,reqId:%s, resp:%s%n", resp.getCode(), resp.getMsg(), resp.getRequestId(),
					Jsons.createGSON(true, false)
						.toJson(JsonParser
							.parseString(new String(resp.getRawResponse().getBody(), StandardCharsets.UTF_8))));
			throw new Exception(resp.getMsg());
		}

		return toDocument(Jsons.DEFAULT.toJson(resp.getData()));
	}

	/**
	 * use tenant_access_token [tenant identity]
	 * @param documentId documentId
	 * @param tenantAccessToken tenantAccessToken
	 * @return String
	 */
	public Document getDocumentContentByTenant(String documentId, String tenantAccessToken) throws Exception {
		RawContentDocumentReq req = RawContentDocumentReq.newBuilder().documentId(documentId).lang(0).build();

		RawContentDocumentResp resp = client.docx()
			.document()
			.rawContent(req, RequestOptions.newBuilder().tenantAccessToken(tenantAccessToken).build());
		if (!resp.success()) {
			System.out.printf("code:%s,msg:%s,reqId:%s, resp:%s%n", resp.getCode(), resp.getMsg(), resp.getRequestId(),
					Jsons.createGSON(true, false)
						.toJson(JsonParser
							.parseString(new String(resp.getRawResponse().getBody(), StandardCharsets.UTF_8))));
			throw new Exception(resp.getMsg());
		}
		return toDocument(Jsons.DEFAULT.toJson(resp.getData()));
	}

	/**
	 * get document list
	 * @param userAccessToken userAccessToken
	 * @return String
	 */
	public Document getDocumentListByUser(String userAccessToken) throws Exception {
		ListFileReq req = ListFileReq.newBuilder().orderBy("EditedTime").direction("DESC").build();
		ListFileResp resp = client.drive()
			.file()
			.list(req, RequestOptions.newBuilder().userAccessToken(userAccessToken).build());
		if (!resp.success()) {
			System.out.printf("code:%s,msg:%s,reqId:%s, resp:%s%n", resp.getCode(), resp.getMsg(), resp.getRequestId(),
					Jsons.createGSON(true, false)
						.toJson(JsonParser
							.parseString(new String(resp.getRawResponse().getBody(), StandardCharsets.UTF_8))));
			throw new Exception(resp.getMsg());
		}
		return toDocument(Jsons.DEFAULT.toJson(resp.getData()));
	}

	private Document toDocument(String docText) {
		return new Document(docText);
	}

	@Override
	public List<Document> get() {
		List<Document> documents = new ArrayList<>();
		if (this.feiShuResource != null) {
			loadDocuments(documents, this.feiShuResource);
		}
		return documents;
	}

	private void loadDocuments(List<Document> documents, FeiShuResource feiShuResource) {
		String appId = feiShuResource.getAppId();
		String appSecret = feiShuResource.getAppSecret();
		String source = format("feishu://%s/%s", appId, appSecret);
		try {
			documents.add(new Document(source));
			if (this.userAccessToken != null) {
				documents.add(getDocumentListByUser(userAccessToken));
			}
			else {
				log.info("userAccessToken is null");
			}
			if (this.tenantAccessToken != null && this.documentId != null) {
				documents.add(getDocumentContentByTenant(documentId, tenantAccessToken));
			}
			else {
				log.info("tenantAccessToken or documentId is null");
			}
			if (this.userAccessToken != null && this.documentId != null) {
				documents.add(getDocumentContentByUser(documentId, userAccessToken));
			}
			else {
				log.info("userAccessToken or documentId is null");
			}

		}
		catch (Exception e) {
			log.warn("Failed to load an object with appId: {}, appSecret: {},{}", appId, appSecret, e.getMessage(), e);
		}
	}

}

FeiShuDocumentReader构造器依赖FeiShuResource,其get方法通过loadDocuments将feiShuResource解析为documents,它通过com.lark.oapi.Client根据userAccessToken或tenantAccessToken去读取文档

FeiShuResource

community/document-readers/spring-ai-alibaba-starter-document-reader-larksuite/src/main/java/com/alibaba/cloud/ai/reader/feishu/FeiShuResource.java

复制代码
public class FeiShuResource implements Resource {

	public static final String SOURCE = "source";

	public static final String FEISHU_PROPERTIES_PREFIX = "spring.ai.alibaba.plugin.feishu";

	private final String appId;

	private final String appSecret;

	//......
}	

FeiShuResource定义了appId、appSecret属性

示例

复制代码
@EnabledIfEnvironmentVariable(named = "FEISHU_APP_ID", matches = ".+")
@EnabledIfEnvironmentVariable(named = "FEISHU_APP_SECRET", matches = ".+")
public class FeiShuDocumentReaderTest {

	private static final Logger log = LoggerFactory.getLogger(FeiShuDocumentReaderTest.class);

	// Get configuration from environment variables
	private static final String FEISHU_APP_ID = System.getenv("FEISHU_APP_ID");

	private static final String FEISHU_APP_SECRET = System.getenv("FEISHU_APP_SECRET");

	// Optional user token and document ID from environment variables
	private static final String FEISHU_USER_TOKEN = System.getenv("FEISHU_USER_TOKEN");

	private static final String FEISHU_DOCUMENT_ID = System.getenv("FEISHU_DOCUMENT_ID");

	private FeiShuDocumentReader feiShuDocumentReader;

	private FeiShuResource feiShuResource;

	static {
		if (FEISHU_APP_ID == null || FEISHU_APP_SECRET == null) {
			System.out
				.println("FEISHU_APP_ID or FEISHU_APP_SECRET environment variable is not set. Tests will be skipped.");
		}
	}

	@BeforeEach
	void setup() {
		// Skip test if environment variables are not set
		Assumptions.assumeTrue(FEISHU_APP_ID != null && !FEISHU_APP_ID.isEmpty(),
				"Skipping test because FEISHU_APP_ID is not set");
		Assumptions.assumeTrue(FEISHU_APP_SECRET != null && !FEISHU_APP_SECRET.isEmpty(),
				"Skipping test because FEISHU_APP_SECRET is not set");

		// Create FeiShuResource with environment variables
		feiShuResource = FeiShuResource.builder().appId(FEISHU_APP_ID).appSecret(FEISHU_APP_SECRET).build();
	}

	@Test
	void feiShuDocumentTest() {
		feiShuDocumentReader = new FeiShuDocumentReader(feiShuResource);
		List<Document> documentList = feiShuDocumentReader.get();
		log.info("result:{}", documentList);
	}

	@Test
	void feiShuDocumentTestByUserToken() {
		// Skip test if user token is not set
		Assumptions.assumeTrue(FEISHU_USER_TOKEN != null && !FEISHU_USER_TOKEN.isEmpty(),
				"Skipping test because FEISHU_USER_TOKEN is not set");

		feiShuDocumentReader = new FeiShuDocumentReader(feiShuResource, FEISHU_USER_TOKEN);
		List<Document> documentList = feiShuDocumentReader.get();
		log.info("result:{}", documentList);
	}

	@Test
	void feiShuDocumentTestByUserTokenAndDocumentId() {
		// Skip test if user token or document ID is not set
		Assumptions.assumeTrue(FEISHU_USER_TOKEN != null && !FEISHU_USER_TOKEN.isEmpty(),
				"Skipping test because FEISHU_USER_TOKEN is not set");
		Assumptions.assumeTrue(FEISHU_DOCUMENT_ID != null && !FEISHU_DOCUMENT_ID.isEmpty(),
				"Skipping test because FEISHU_DOCUMENT_ID is not set");

		feiShuDocumentReader = new FeiShuDocumentReader(feiShuResource, FEISHU_USER_TOKEN, FEISHU_DOCUMENT_ID);
		List<Document> documentList = feiShuDocumentReader.get();
		log.info("result:{}", documentList);
	}

}

小结

spring-ai-alibaba-starter-document-reader-larksuite提供了FeiShuDocumentReader用于根据userAccessToken或tenantAccessToken读取飞书文档列表或者指定documentId的文档内容。

doc

相关推荐
机器之心2 分钟前
WSDM 25唯一最佳论文:从谱视角揭开推荐系统流行度偏差放大之谜
人工智能
新智元10 分钟前
国产 Vidu Q1 出道即顶流,登顶 VBench!吉卜力、广告大片、科幻特效全包了
人工智能·openai
北漂程序员学习30 分钟前
如何避免被目标网站识别为爬虫?
爬虫·python·scrapy·flask·scipy
人机与认知实验室35 分钟前
宽度学习与深度学习
人工智能·深度学习·学习
新智元38 分钟前
AI 永生时代来临!DeepMind「生成幽灵」让逝者赛博重生
人工智能·openai
HyperAI超神经40 分钟前
【vLLM 学习】Aqlm 示例
java·开发语言·数据库·人工智能·学习·教程·vllm
纪元A梦43 分钟前
华为OD机试真题——数据分类(2025A卷:100分)Java/python/JavaScript/C++/C语言/GO六种最佳实现
java·javascript·c++·python·华为od·go·华为od机试题
cnbestec44 分钟前
欣佰特携数十款机器人相关前沿产品,亮相第二届人形机器人和具身智能行业盛会
人工智能·机器人
爱的叹息1 小时前
关于 梯度下降算法、线性回归模型、梯度下降训练线性回归、线性回归的其他训练算法 以及 回归模型分类 的详细说明
人工智能·算法·回归·线性回归