package cc.mrbird.febs.ai.utils; import com.aliyun.bailian20231229.models.*; import com.aliyun.teautil.models.RuntimeOptions; import com.fasterxml.jackson.databind.ObjectMapper; import java.io.File; import java.io.FileInputStream; import java.net.HttpURLConnection; import java.net.URL; import java.nio.file.Paths; import java.security.MessageDigest; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; public class KnowledgeBaseUtil { private static String ACCESS_KEY_ID = "LTAI5tCyQRwhZ2eimxCFKbdq"; private static String ACCESS_KEY_SECRET = "fs1mEwLXg2j9XuKJsFoW8ThQbJFqHl"; private static String WORKSPACE_ID = "llm-4bcr09yfxlgz0b0t"; private static String ENDPOINT = "bailian.cn-beijing.aliyuncs.com"; /** * description : *

使用凭据初始化账号Client

* @return Client */ public static com.aliyun.bailian20231229.Client createClient() throws Exception { com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config() .setAccessKeyId(ACCESS_KEY_ID) .setAccessKeySecret(ACCESS_KEY_SECRET) .setEndpoint(ENDPOINT); return new com.aliyun.bailian20231229.Client(config); } /** * 添加分类 * @param categoryName 分类名称 * @param parentCategoryId 父分类ID * @return 返回新增分类的ID * @throws Exception 当API调用失败或其他异常情况时抛出 */ public static String AddCategory(String categoryName, String parentCategoryId) throws Exception { // 创建阿里云百炼客户端 com.aliyun.bailian20231229.Client client = KnowledgeBaseUtil.createClient(); // 构造添加分类请求参数 com.aliyun.bailian20231229.models.AddCategoryRequest addCategoryRequest = new com.aliyun.bailian20231229.models.AddCategoryRequest() .setCategoryName(categoryName) .setCategoryType("UNSTRUCTURED") .setParentCategoryId(parentCategoryId); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); java.util.Map headers = new java.util.HashMap<>(); // 调用添加分类API接口 AddCategoryResponse addCategoryResponse = client.addCategoryWithOptions(WORKSPACE_ID, addCategoryRequest, headers, runtime); return addCategoryResponse.getBody().getData().getCategoryId(); } /** * 计算文件的MD5值。 * * @param filePath 文件本地路径 * @return 文件的MD5值 * @throws Exception 如果计算过程中发生错误 */ public static String calculateMD5(String filePath) throws Exception { MessageDigest md = MessageDigest.getInstance("MD5"); try (FileInputStream fis = new FileInputStream(filePath)) { byte[] buffer = new byte[4096]; int bytesRead; while ((bytesRead = fis.read(buffer)) != -1) { md.update(buffer, 0, bytesRead); } } StringBuilder sb = new StringBuilder(); for (byte b : md.digest()) { sb.append(String.format("%02x", b & 0xff)); } return sb.toString(); } /** * 获取文件大小(以字节为单位)。 * * @param filePath 文件本地路径 * @return 文件大小(以字节为单位) */ public static String getFileSize(String filePath) { File file = new File(filePath); long fileSize = file.length(); return String.valueOf(fileSize); } /** * 申请文件上传租约。 * * @param client 客户端对象 * @param categoryId 类目ID * @param fileName 文件名称 * @param fileMd5 文件的MD5值 * @param fileSize 文件大小(以字节为单位) * @param workspaceId 业务空间ID * @return 阿里云百炼服务的响应对象 */ public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId, String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception { Map headers = new HashMap<>(); com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest(); applyFileUploadLeaseRequest.setFileName(fileName); applyFileUploadLeaseRequest.setMd5(fileMd5); applyFileUploadLeaseRequest.setSizeInBytes(fileSize); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null; applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId, applyFileUploadLeaseRequest, headers, runtime); return applyFileUploadLeaseResponse; } /** * 上传文件到临时存储。 * * @param preSignedUrl 上传租约中的 URL * @param headers 上传请求的头部 * @param filePath 文件本地路径 * @throws Exception 如果上传过程中发生错误 */ public static void uploadFile(String preSignedUrl, Map headers, String filePath) throws Exception { File file = new File(filePath); if (!file.exists() || !file.isFile()) { throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath); } try (FileInputStream fis = new FileInputStream(file)) { URL url = new URL(preSignedUrl); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("PUT"); conn.setDoOutput(true); // 设置上传请求头 conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra")); conn.setRequestProperty("Content-Type", headers.get("Content-Type")); // 分块读取并上传文件 byte[] buffer = new byte[4096]; int bytesRead; while ((bytesRead = fis.read(buffer)) != -1) { conn.getOutputStream().write(buffer, 0, bytesRead); } int responseCode = conn.getResponseCode(); if (responseCode != 200) { throw new RuntimeException("上传失败: " + responseCode); } } } /** * 将文件添加到类目中。 * * @param client 客户端对象 * @param leaseId 租约ID * @param parser 用于文件的解析器 * @param categoryId 类目ID * @param workspaceId 业务空间ID * @return 阿里云百炼服务的响应对象 */ public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser, String categoryId, String workspaceId) throws Exception { Map headers = new HashMap<>(); com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest(); addFileRequest.setLeaseId(leaseId); addFileRequest.setParser(parser); addFileRequest.setCategoryId(categoryId); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime); } /** * 查询文件的基本信息。 * * @param client 客户端对象 * @param workspaceId 业务空间ID * @param fileId 文件ID * @return 阿里云百炼服务的响应对象 */ public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId) throws Exception { Map headers = new HashMap<>(); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); return client.describeFileWithOptions(workspaceId, fileId, headers, runtime); } /** * 在阿里云百炼服务中创建知识库(初始化)。 * * @param client 客户端对象 * @param workspaceId 业务空间ID * @param fileId 文件ID * @param name 知识库名称 * @param structureType 知识库的数据类型 * @param sourceType 应用数据的数据类型,支持类目类型和文件类型 * @param sinkType 知识库的向量存储类型 * @return 阿里云百炼服务的响应对象 */ public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId, String name, String structureType, String sourceType, String sinkType) throws Exception { Map headers = new HashMap<>(); com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest(); createIndexRequest.setStructureType(structureType); createIndexRequest.setName(name); createIndexRequest.setSourceType(sourceType); createIndexRequest.setSinkType(sinkType); createIndexRequest.setDocumentIds(Collections.singletonList(fileId)); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime); } /** * 向阿里云百炼服务提交索引任务。 * * @param client 客户端对象 * @param workspaceId 业务空间ID * @param indexId 知识库ID * @return 阿里云百炼服务的响应对象 */ public static SubmitIndexJobResponse submitIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId) throws Exception { Map headers = new HashMap<>(); com.aliyun.bailian20231229.models.SubmitIndexJobRequest submitIndexJobRequest = new com.aliyun.bailian20231229.models.SubmitIndexJobRequest(); submitIndexJobRequest.setIndexId(indexId); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); return client.submitIndexJobWithOptions(workspaceId, submitIndexJobRequest, headers, runtime); } /** * 查询索引任务状态。 * * @param client 客户端对象 * @param workspaceId 业务空间ID * @param jobId 任务ID * @param indexId 知识库ID * @return 阿里云百炼服务的响应对象 */ public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client, String workspaceId, String jobId, String indexId) throws Exception { Map headers = new HashMap<>(); com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest(); getIndexJobStatusRequest.setIndexId(indexId); getIndexJobStatusRequest.setJobId(jobId); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); GetIndexJobStatusResponse getIndexJobStatusResponse = null; getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers, runtime); return getIndexJobStatusResponse; } /** * 从指定的文档类知识库中永久删除一个或多个文件 * * @param client 客户端(Client) * @param workspaceId 业务空间ID * @param indexId 知识库ID * @param fileId 文件ID * @return 阿里云百炼服务的响应 */ public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId) throws Exception { Map headers = new HashMap<>(); DeleteIndexDocumentRequest deleteIndexDocumentRequest = new DeleteIndexDocumentRequest(); deleteIndexDocumentRequest.setIndexId(indexId); deleteIndexDocumentRequest.setDocumentIds(Collections.singletonList(fileId)); com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions(); return client.deleteIndexDocumentWithOptions(workspaceId, deleteIndexDocumentRequest, headers, runtime); } /** * 使用阿里云百炼服务创建知识库。 * * @param filePath 文件本地路径 * @param workspaceId 业务空间ID * @param name 知识库名称 * @return 如果成功,返回知识库ID;否则返回 null */ public static String createKnowledgeBase(String filePath, String workspaceId, String name) { // 设置默认值 String categoryId = "default"; String parser = "DASHSCOPE_DOCMIND"; String sourceType = "DATA_CENTER_FILE"; String structureType = "unstructured"; String sinkType = "DEFAULT"; try { // 步骤1:初始化客户端(Client) System.out.println("步骤1:初始化Client"); com.aliyun.bailian20231229.Client client = KnowledgeBaseUtil.createClient(); // 步骤2:准备文件信息 System.out.println("步骤2:准备文件信息"); String fileName = new File(filePath).getName(); String fileMd5 = calculateMD5(filePath); String fileSize = getFileSize(filePath); // 步骤3:申请上传租约 System.out.println("步骤3:向阿里云百炼申请上传租约"); ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, workspaceId); String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId(); String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl(); Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders(); // 步骤4:上传文件 System.out.println("步骤4:上传文件到阿里云百炼"); // 请自行安装jackson-databind // 将上一步的uploadHeaders转换为Map(Key-Value形式) ObjectMapper mapper = new ObjectMapper(); Map uploadHeadersMap = (Map) mapper.readValue(mapper.writeValueAsString(uploadHeaders), Map.class); uploadFile(uploadUrl, uploadHeadersMap, filePath); // 步骤5:将文件添加到服务器 System.out.println("步骤5:将文件添加到阿里云百炼服务器"); AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId); String fileId = addResponse.getBody().getData().getFileId(); // 步骤6:检查文件状态 System.out.println("步骤6:检查阿里云百炼中的文件状态"); while (true) { DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId); String status = describeResponse.getBody().getData().getStatus(); System.out.println("当前文件状态:" + status); if (status.equals("INIT")) { System.out.println("文件待解析,请稍候..."); } else if (status.equals("PARSING")) { System.out.println("文件解析中,请稍候..."); } else if (status.equals("PARSE_SUCCESS")) { System.out.println("文件解析完成!"); break; } else { System.out.println("未知的文件状态:" + status + ",请联系技术支持。"); return null; } TimeUnit.SECONDS.sleep(5); } // 步骤7:初始化知识库 System.out.println("步骤7:在阿里云百炼中创建知识库"); CreateIndexResponse indexResponse = createIndex(client, workspaceId, fileId, name, structureType, sourceType, sinkType); String indexId = indexResponse.getBody().getData().getId(); // 步骤8:提交索引任务 System.out.println("步骤8:向阿里云百炼提交索引任务"); SubmitIndexJobResponse submitResponse = submitIndex(client, workspaceId, indexId); String jobId = submitResponse.getBody().getData().getId(); // 步骤9:获取索引任务状态 System.out.println("步骤9:获取阿里云百炼索引任务状态"); while (true) { GetIndexJobStatusResponse getStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId); String status = getStatusResponse.getBody().getData().getStatus(); System.out.println("当前索引任务状态:" + status); if (status.equals("COMPLETED")) { break; } TimeUnit.SECONDS.sleep(5); } System.out.println("阿里云百炼知识库创建成功!"); return indexId; } catch (Exception e) { System.out.println("发生错误:" + e.getMessage()); e.printStackTrace(); return null; } } /** * 使用阿里云百炼服务更新知识库 * * @param filePath 文件(更新后的)的实际本地路径 * @param workspaceId 业务空间ID * @param indexId 需要更新的知识库ID * @param oldFileId 需要更新的文件的FileID * @return 如果成功,返回知识库ID;否则返回 null */ public static String updateKnowledgeBase(String filePath, String workspaceId, String indexId, String oldFileId) { // 设置默认值 String categoryId = "default"; String parser = "DASHSCOPE_DOCMIND"; String sourceType = "DATA_CENTER_FILE"; try { // 步骤1:初始化客户端(Client) System.out.println("步骤1:创建Client"); com.aliyun.bailian20231229.Client client = createClient(); // 步骤2:准备文件信息(更新后的文件) System.out.println("步骤2:准备文件信息"); String fileName = Paths.get(filePath).getFileName().toString(); String fileMd5 = calculateMD5(filePath); String fileSize = getFileSize(filePath); // 步骤3:申请上传租约 System.out.println("步骤3:向阿里云百炼申请上传租约"); ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, workspaceId); String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId(); String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl(); Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders(); // 步骤4:上传文件到临时存储 System.out.println("步骤4:上传文件到临时存储"); // 请自行安装jackson-databind // 将上一步的uploadHeaders转换为Map(Key-Value形式) ObjectMapper mapper = new ObjectMapper(); Map uploadHeadersMap = (Map) mapper.readValue(mapper.writeValueAsString(uploadHeaders), Map.class); uploadFile(uploadUrl, uploadHeadersMap, filePath); // 步骤5:添加文件到类目中 System.out.println("步骤5:添加文件到类目中"); AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId); String fileId = addResponse.getBody().getData().getFileId(); // 步骤6:检查更新后的文件状态 System.out.println("步骤6:检查阿里云百炼中的文件状态"); while (true) { DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId); String status = describeResponse.getBody().getData().getStatus(); System.out.println("当前文件状态:" + status); if ("INIT".equals(status)) { System.out.println("文件待解析,请稍候..."); } else if ("PARSING".equals(status)) { System.out.println("文件解析中,请稍候..."); } else if ("PARSE_SUCCESS".equals(status)) { System.out.println("文件解析完成!"); break; } else { System.out.println("未知的文件状态:" + status + ",请联系技术支持。"); return null; } Thread.sleep(5000); } // 步骤7:提交追加文件任务 System.out.println("步骤7:提交追加文件任务"); SubmitIndexAddDocumentsJobResponse indexAddResponse = submitIndexAddDocumentsJob(client, workspaceId, indexId, fileId, sourceType); String jobId = indexAddResponse.getBody().getData().getId(); // 步骤8:等待追加任务完成 System.out.println("步骤8:等待追加任务完成"); while (true) { GetIndexJobStatusResponse jobStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId); String status = jobStatusResponse.getBody().getData().getStatus(); System.out.println("当前索引任务状态:" + status); if ("COMPLETED".equals(status)) { break; } Thread.sleep(5000); } // 步骤9:删除旧文件 System.out.println("步骤9:删除旧文件"); deleteIndexDocument(client, workspaceId, indexId, oldFileId); System.out.println("阿里云百炼知识库更新成功!"); return indexId; } catch (Exception e) { System.out.println("发生错误:" + e.getMessage()); return null; } } /** * 向一个文档类知识库追加导入已解析的文件 * * @param client 客户端(Client) * @param workspaceId 业务空间ID * @param indexId 知识库ID * @param fileId 文件ID * @param sourceType 数据类型 * @return 阿里云百炼服务的响应 */ public static SubmitIndexAddDocumentsJobResponse submitIndexAddDocumentsJob(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId, String sourceType) throws Exception { Map headers = new HashMap<>(); SubmitIndexAddDocumentsJobRequest submitIndexAddDocumentsJobRequest = new SubmitIndexAddDocumentsJobRequest(); submitIndexAddDocumentsJobRequest.setIndexId(indexId); submitIndexAddDocumentsJobRequest.setDocumentIds(Collections.singletonList(fileId)); submitIndexAddDocumentsJobRequest.setSourceType(sourceType); RuntimeOptions runtime = new RuntimeOptions(); return client.submitIndexAddDocumentsJobWithOptions(workspaceId, submitIndexAddDocumentsJobRequest, headers, runtime); } /** * 主函数。 */ // public static void main(String[] args) { // Scanner scanner = new Scanner(System.in); // // String filePath = "D:\\项目\\大模型\\阿里云百炼\\知识库\\薪资谈判常见100问与答.md"; // // System.out.print("请为您的知识库输入一个名称:"); // String kbName = scanner.nextLine(); // // String workspaceId = WORKSPACE_ID; // String result = createKnowledgeBase(filePath, workspaceId, kbName); // if (result != null) { // System.out.println("知识库ID: " + result); // } // } /** * 主函数。 */ public static void main(String[] args) { String filePath = "D:\\项目\\大模型\\阿里云百炼\\知识库\\薪资谈判常见100问与答.md"; String indexId = "xlmj6e7ix1"; // 即 AddFile 接口返回的 FileId。您也可以在阿里云百炼控制台的应用数据页面,单击文件名称旁的 ID 图标获取。 String oldFileId = "file_5c9f7e4e0f3e4b4ea2bd208a1b4f5e6f_12629554"; String workspaceId = WORKSPACE_ID; String result = updateKnowledgeBase(filePath, workspaceId, indexId, oldFileId); if (result != null) { System.out.println("知识库更新成功,返回知识库ID: " + result); } else { System.out.println("知识库更新失败。"); } } }