package cc.mrbird.febs.ai.util;
import cn.hutool.core.util.StrUtil;
import com.aliyun.bailian20231229.models.*;
import com.aliyun.teautil.models.RuntimeOptions;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.io.FileInputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public class KnowledgeBaseUtil {
public static final String ACCESS_KEY_ID = "LTAI5tCyQRwhZ2eimxCFKbdq";
public static final String ACCESS_KEY_SECRET = "fs1mEwLXg2j9XuKJsFoW8ThQbJFqHl";
public static final String WORKSPACE_ID = "llm-4bcr09yfxlgz0b0t";
public static final String ENDPOINT = "bailian.cn-beijing.aliyuncs.com";
/**
* description :
*
使用凭据初始化账号Client
* @return Client
*/
public static com.aliyun.bailian20231229.Client createClient() throws Exception {
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
.setAccessKeyId(ACCESS_KEY_ID)
.setAccessKeySecret(ACCESS_KEY_SECRET)
.setEndpoint(ENDPOINT);
return new com.aliyun.bailian20231229.Client(config);
}
/**
* 添加分类
* @param categoryName 分类名称
* @param parentCategoryId 父分类ID
* @return 返回新增分类的ID
* @throws Exception 当API调用失败或其他异常情况时抛出
*/
public static String AddCategory(String categoryName, String parentCategoryId) throws Exception {
if (StrUtil.isBlank(parentCategoryId)){
parentCategoryId = "";
}
// 创建阿里云百炼客户端
com.aliyun.bailian20231229.Client client = KnowledgeBaseUtil.createClient();
// 构造添加分类请求参数
com.aliyun.bailian20231229.models.AddCategoryRequest addCategoryRequest = new com.aliyun.bailian20231229.models.AddCategoryRequest()
.setCategoryName(categoryName)
.setCategoryType("UNSTRUCTURED")
.setParentCategoryId(parentCategoryId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
java.util.Map headers = new java.util.HashMap<>();
// 调用添加分类API接口
AddCategoryResponse addCategoryResponse = client.addCategoryWithOptions(WORKSPACE_ID, addCategoryRequest, headers, runtime);
return addCategoryResponse.getBody().getData().getCategoryId();
}
/**
* 计算文件的MD5值。
*
* @param filePath 文件本地路径
* @return 文件的MD5值
* @throws Exception 如果计算过程中发生错误
*/
public static String calculateMD5(String filePath) throws Exception {
MessageDigest md = MessageDigest.getInstance("MD5");
try (FileInputStream fis = new FileInputStream(filePath)) {
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
md.update(buffer, 0, bytesRead);
}
}
StringBuilder sb = new StringBuilder();
for (byte b : md.digest()) {
sb.append(String.format("%02x", b & 0xff));
}
return sb.toString();
}
/**
* 获取文件大小(以字节为单位)。
*
* @param filePath 文件本地路径
* @return 文件大小(以字节为单位)
*/
public static String getFileSize(String filePath) {
File file = new File(filePath);
long fileSize = file.length();
return String.valueOf(fileSize);
}
/**
* 申请文件上传租约。
*
* @param client 客户端对象
* @param categoryId 类目ID
* @param fileName 文件名称
* @param fileMd5 文件的MD5值
* @param fileSize 文件大小(以字节为单位)
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象
*/
public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId, String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
Map headers = new HashMap<>();
com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
applyFileUploadLeaseRequest.setFileName(fileName);
applyFileUploadLeaseRequest.setMd5(fileMd5);
applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId, applyFileUploadLeaseRequest, headers, runtime);
return applyFileUploadLeaseResponse;
}
/**
* 上传文件到临时存储。
*
* @param preSignedUrl 上传租约中的 URL
* @param headers 上传请求的头部
* @param filePath 文件本地路径
* @throws Exception 如果上传过程中发生错误
*/
public static void uploadFile(String preSignedUrl, Map headers, String filePath) throws Exception {
File file = new File(filePath);
if (!file.exists() || !file.isFile()) {
throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
}
try (FileInputStream fis = new FileInputStream(file)) {
URL url = new URL(preSignedUrl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("PUT");
conn.setDoOutput(true);
// 设置上传请求头
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
// 分块读取并上传文件
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
conn.getOutputStream().write(buffer, 0, bytesRead);
}
int responseCode = conn.getResponseCode();
if (responseCode != 200) {
throw new RuntimeException("上传失败: " + responseCode);
}
}
}
/**
* 将文件添加到类目中。
*
* @param client 客户端对象
* @param leaseId 租约ID
* @param parser 用于文件的解析器
* @param categoryId 类目ID
* @param workspaceId 业务空间ID
* @return 阿里云百炼服务的响应对象
*/
public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser, String categoryId, String workspaceId) throws Exception {
Map headers = new HashMap<>();
com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
addFileRequest.setLeaseId(leaseId);
addFileRequest.setParser(parser);
addFileRequest.setCategoryId(categoryId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
}
/**
* 查询文件的基本信息。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param fileId 文件ID
* @return 阿里云百炼服务的响应对象
*/
public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId) throws Exception {
Map headers = new HashMap<>();
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
}
/**
* 在阿里云百炼服务中创建知识库(初始化)。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param fileId 文件ID
* @param name 知识库名称
* @param structureType 知识库的数据类型
* @param sourceType 应用数据的数据类型,支持类目类型和文件类型
* @param sinkType 知识库的向量存储类型
* @return 阿里云百炼服务的响应对象
*/
public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId, String name, String structureType, String sourceType, String sinkType) throws Exception {
Map headers = new HashMap<>();
com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
createIndexRequest.setStructureType(structureType);
createIndexRequest.setName(name);
createIndexRequest.setSourceType(sourceType);
createIndexRequest.setSinkType(sinkType);
createIndexRequest.setDocumentIds(Collections.singletonList(fileId));
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
}
/**
* 向阿里云百炼服务提交索引任务。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象
*/
public static SubmitIndexJobResponse submitIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId) throws Exception {
Map headers = new HashMap<>();
com.aliyun.bailian20231229.models.SubmitIndexJobRequest submitIndexJobRequest = new com.aliyun.bailian20231229.models.SubmitIndexJobRequest();
submitIndexJobRequest.setIndexId(indexId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.submitIndexJobWithOptions(workspaceId, submitIndexJobRequest, headers, runtime);
}
/**
* 查询索引任务状态。
*
* @param client 客户端对象
* @param workspaceId 业务空间ID
* @param jobId 任务ID
* @param indexId 知识库ID
* @return 阿里云百炼服务的响应对象
*/
public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client, String workspaceId, String jobId, String indexId) throws Exception {
Map headers = new HashMap<>();
com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
getIndexJobStatusRequest.setIndexId(indexId);
getIndexJobStatusRequest.setJobId(jobId);
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
GetIndexJobStatusResponse getIndexJobStatusResponse = null;
getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers, runtime);
return getIndexJobStatusResponse;
}
/**
* 从指定的文档类知识库中永久删除一个或多个文件
*
* @param client 客户端(Client)
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param fileId 文件ID
* @return 阿里云百炼服务的响应
*/
public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId) throws Exception {
Map headers = new HashMap<>();
DeleteIndexDocumentRequest deleteIndexDocumentRequest = new DeleteIndexDocumentRequest();
deleteIndexDocumentRequest.setIndexId(indexId);
deleteIndexDocumentRequest.setDocumentIds(Collections.singletonList(fileId));
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
return client.deleteIndexDocumentWithOptions(workspaceId, deleteIndexDocumentRequest, headers, runtime);
}
/**
* 使用阿里云百炼服务创建知识库。
*
* @param filePath 文件本地路径
* @param workspaceId 业务空间ID
* @param name 知识库名称
* @return 如果成功,返回知识库ID;否则返回 null
*/
public static String createKnowledgeBase(String filePath, String workspaceId, String name,String categoryId) {
// 设置默认值
if (StrUtil.isBlank(categoryId)){
categoryId = "default";
}
String parser = "DASHSCOPE_DOCMIND";
String sourceType = "DATA_CENTER_FILE";
String structureType = "unstructured";
String sinkType = "DEFAULT";
try {
// 步骤1:初始化客户端(Client)
System.out.println("步骤1:初始化Client");
com.aliyun.bailian20231229.Client client = KnowledgeBaseUtil.createClient();
// 步骤2:准备文件信息
System.out.println("步骤2:准备文件信息");
String fileName = new File(filePath).getName();
String fileMd5 = calculateMD5(filePath);
String fileSize = getFileSize(filePath);
// 步骤3:申请上传租约
System.out.println("步骤3:向阿里云百炼申请上传租约");
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, workspaceId);
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
// 步骤4:上传文件
System.out.println("步骤4:上传文件到阿里云百炼");
// 请自行安装jackson-databind
// 将上一步的uploadHeaders转换为Map(Key-Value形式)
ObjectMapper mapper = new ObjectMapper();
Map uploadHeadersMap = (Map) mapper.readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
uploadFile(uploadUrl, uploadHeadersMap, filePath);
// 步骤5:将文件添加到服务器
System.out.println("步骤5:将文件添加到阿里云百炼服务器");
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
String fileId = addResponse.getBody().getData().getFileId();
// 步骤6:检查文件状态
System.out.println("步骤6:检查阿里云百炼中的文件状态");
while (true) {
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
String status = describeResponse.getBody().getData().getStatus();
System.out.println("当前文件状态:" + status);
if (status.equals("INIT")) {
System.out.println("文件待解析,请稍候...");
} else if (status.equals("PARSING")) {
System.out.println("文件解析中,请稍候...");
} else if (status.equals("PARSE_SUCCESS")) {
System.out.println("文件解析完成!");
break;
} else {
System.out.println("未知的文件状态:" + status + ",请联系技术支持。");
return null;
}
TimeUnit.SECONDS.sleep(5);
}
// 步骤7:初始化知识库
System.out.println("步骤7:在阿里云百炼中创建知识库");
CreateIndexResponse indexResponse = createIndex(client, workspaceId, fileId, name, structureType, sourceType, sinkType);
String indexId = indexResponse.getBody().getData().getId();
// 步骤8:提交索引任务
System.out.println("步骤8:向阿里云百炼提交索引任务");
SubmitIndexJobResponse submitResponse = submitIndex(client, workspaceId, indexId);
String jobId = submitResponse.getBody().getData().getId();
// 步骤9:获取索引任务状态
System.out.println("步骤9:获取阿里云百炼索引任务状态");
while (true) {
GetIndexJobStatusResponse getStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
String status = getStatusResponse.getBody().getData().getStatus();
System.out.println("当前索引任务状态:" + status);
if (status.equals("COMPLETED")) {
break;
}
TimeUnit.SECONDS.sleep(5);
}
System.out.println("阿里云百炼知识库创建成功!");
return indexId;
} catch (Exception e) {
System.out.println("发生错误:" + e.getMessage());
e.printStackTrace();
return null;
}
}
/**
* 使用阿里云百炼服务更新知识库
*
* @param filePath 文件(更新后的)的实际本地路径
* @param workspaceId 业务空间ID
* @param indexId 需要更新的知识库ID
* @param oldFileId 需要更新的文件的FileID
* @return 如果成功,返回知识库ID;否则返回 null
*/
public static String updateKnowledgeBase(String filePath, String workspaceId, String indexId, String oldFileId, String categoryId) {
// 设置默认值
if (StrUtil.isBlank(categoryId)){
categoryId = "default";
}
String parser = "DASHSCOPE_DOCMIND";
String sourceType = "DATA_CENTER_FILE";
try {
// 步骤1:初始化客户端(Client)
System.out.println("步骤1:创建Client");
com.aliyun.bailian20231229.Client client = createClient();
// 步骤2:准备文件信息(更新后的文件)
System.out.println("步骤2:准备文件信息");
String fileName = Paths.get(filePath).getFileName().toString();
String fileMd5 = calculateMD5(filePath);
String fileSize = getFileSize(filePath);
// 步骤3:申请上传租约
System.out.println("步骤3:向阿里云百炼申请上传租约");
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, workspaceId);
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
// 步骤4:上传文件到临时存储
System.out.println("步骤4:上传文件到临时存储");
// 请自行安装jackson-databind
// 将上一步的uploadHeaders转换为Map(Key-Value形式)
ObjectMapper mapper = new ObjectMapper();
Map uploadHeadersMap = (Map) mapper.readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
uploadFile(uploadUrl, uploadHeadersMap, filePath);
// 步骤5:添加文件到类目中
System.out.println("步骤5:添加文件到类目中");
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
String fileId = addResponse.getBody().getData().getFileId();
// 步骤6:检查更新后的文件状态
System.out.println("步骤6:检查阿里云百炼中的文件状态");
while (true) {
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
String status = describeResponse.getBody().getData().getStatus();
System.out.println("当前文件状态:" + status);
if ("INIT".equals(status)) {
System.out.println("文件待解析,请稍候...");
} else if ("PARSING".equals(status)) {
System.out.println("文件解析中,请稍候...");
} else if ("PARSE_SUCCESS".equals(status)) {
System.out.println("文件解析完成!");
break;
} else {
System.out.println("未知的文件状态:" + status + ",请联系技术支持。");
return null;
}
Thread.sleep(5000);
}
// 步骤7:提交追加文件任务
System.out.println("步骤7:提交追加文件任务");
SubmitIndexAddDocumentsJobResponse indexAddResponse = submitIndexAddDocumentsJob(client, workspaceId, indexId, fileId, sourceType);
String jobId = indexAddResponse.getBody().getData().getId();
// 步骤8:等待追加任务完成
System.out.println("步骤8:等待追加任务完成");
while (true) {
GetIndexJobStatusResponse jobStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
String status = jobStatusResponse.getBody().getData().getStatus();
System.out.println("当前索引任务状态:" + status);
if ("COMPLETED".equals(status)) {
break;
}
Thread.sleep(5000);
}
// 步骤9:删除旧文件
if(StrUtil.isNotBlank(oldFileId)){
System.out.println("步骤9:删除旧文件");
deleteIndexDocument(client, workspaceId, indexId, oldFileId);
}
System.out.println("阿里云百炼知识库更新成功!");
return indexId;
} catch (Exception e) {
System.out.println("发生错误:" + e.getMessage());
return null;
}
}
/**
* 向一个文档类知识库追加导入已解析的文件
*
* @param client 客户端(Client)
* @param workspaceId 业务空间ID
* @param indexId 知识库ID
* @param fileId 文件ID
* @param sourceType 数据类型
* @return 阿里云百炼服务的响应
*/
public static SubmitIndexAddDocumentsJobResponse submitIndexAddDocumentsJob(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId, String sourceType) throws Exception {
Map headers = new HashMap<>();
SubmitIndexAddDocumentsJobRequest submitIndexAddDocumentsJobRequest = new SubmitIndexAddDocumentsJobRequest();
submitIndexAddDocumentsJobRequest.setIndexId(indexId);
submitIndexAddDocumentsJobRequest.setDocumentIds(Collections.singletonList(fileId));
submitIndexAddDocumentsJobRequest.setSourceType(sourceType);
RuntimeOptions runtime = new RuntimeOptions();
return client.submitIndexAddDocumentsJobWithOptions(workspaceId, submitIndexAddDocumentsJobRequest, headers, runtime);
}
/**
* 主函数。
*/
// public static void main(String[] args) {
// Scanner scanner = new Scanner(System.in);
//
// String filePath = "D:\\项目\\大模型\\阿里云百炼\\知识库\\薪资谈判常见100问与答.md";
//
// System.out.print("请为您的知识库输入一个名称:");
// String kbName = scanner.nextLine();
//
// String workspaceId = WORKSPACE_ID;
// String result = createKnowledgeBase(filePath, workspaceId, kbName);
// if (result != null) {
// System.out.println("知识库ID: " + result);
// }
// }
/**
* 主函数。
*/
public static void main(String[] args) {
String filePath = "D:\\项目\\大模型\\阿里云百炼\\知识库\\薪资谈判常见100问与答.md";
String indexId = "xlmj6e7ix1";
String oldFileId = "file_e943bb6d305a49a5acb7781ca00d70dd_12629554";
String workspaceId = WORKSPACE_ID;
String result = updateKnowledgeBase(filePath, workspaceId, indexId, oldFileId,null);
if (result != null) {
System.out.println("知识库更新成功,返回知识库ID: " + result);
} else {
System.out.println("知识库更新失败。");
}
}
}