package cc.mrbird.febs.ai.utils;
|
|
import com.aliyun.bailian20231229.models.*;
|
import com.aliyun.teautil.models.RuntimeOptions;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
import java.io.File;
|
import java.io.FileInputStream;
|
import java.net.HttpURLConnection;
|
import java.net.URL;
|
import java.nio.file.Paths;
|
import java.security.MessageDigest;
|
import java.util.Collections;
|
import java.util.HashMap;
|
import java.util.Map;
|
import java.util.concurrent.TimeUnit;
|
|
public class KnowledgeBaseUtil {
|
private static String ACCESS_KEY_ID = "LTAI5tCyQRwhZ2eimxCFKbdq";
|
private static String ACCESS_KEY_SECRET = "fs1mEwLXg2j9XuKJsFoW8ThQbJFqHl";
|
private static String WORKSPACE_ID = "llm-4bcr09yfxlgz0b0t";
|
private static String ENDPOINT = "bailian.cn-beijing.aliyuncs.com";
|
|
/**
|
* <b>description</b> :
|
* <p>使用凭据初始化账号Client</p>
|
* @return Client
|
*/
|
public static com.aliyun.bailian20231229.Client createClient() throws Exception {
|
com.aliyun.teaopenapi.models.Config config = new com.aliyun.teaopenapi.models.Config()
|
.setAccessKeyId(ACCESS_KEY_ID)
|
.setAccessKeySecret(ACCESS_KEY_SECRET)
|
.setEndpoint(ENDPOINT);
|
return new com.aliyun.bailian20231229.Client(config);
|
}
|
|
|
/**
|
* 添加分类
|
* @param categoryName 分类名称
|
* @param parentCategoryId 父分类ID
|
* @return 返回新增分类的ID
|
* @throws Exception 当API调用失败或其他异常情况时抛出
|
*/
|
public static String AddCategory(String categoryName, String parentCategoryId) throws Exception {
|
// 创建阿里云百炼客户端
|
com.aliyun.bailian20231229.Client client = KnowledgeBaseUtil.createClient();
|
// 构造添加分类请求参数
|
com.aliyun.bailian20231229.models.AddCategoryRequest addCategoryRequest = new com.aliyun.bailian20231229.models.AddCategoryRequest()
|
.setCategoryName(categoryName)
|
.setCategoryType("UNSTRUCTURED")
|
.setParentCategoryId(parentCategoryId);
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
java.util.Map<String, String> headers = new java.util.HashMap<>();
|
// 调用添加分类API接口
|
AddCategoryResponse addCategoryResponse = client.addCategoryWithOptions(WORKSPACE_ID, addCategoryRequest, headers, runtime);
|
return addCategoryResponse.getBody().getData().getCategoryId();
|
}
|
|
/**
|
* 计算文件的MD5值。
|
*
|
* @param filePath 文件本地路径
|
* @return 文件的MD5值
|
* @throws Exception 如果计算过程中发生错误
|
*/
|
public static String calculateMD5(String filePath) throws Exception {
|
MessageDigest md = MessageDigest.getInstance("MD5");
|
try (FileInputStream fis = new FileInputStream(filePath)) {
|
byte[] buffer = new byte[4096];
|
int bytesRead;
|
while ((bytesRead = fis.read(buffer)) != -1) {
|
md.update(buffer, 0, bytesRead);
|
}
|
}
|
StringBuilder sb = new StringBuilder();
|
for (byte b : md.digest()) {
|
sb.append(String.format("%02x", b & 0xff));
|
}
|
return sb.toString();
|
}
|
|
/**
|
* 获取文件大小(以字节为单位)。
|
*
|
* @param filePath 文件本地路径
|
* @return 文件大小(以字节为单位)
|
*/
|
public static String getFileSize(String filePath) {
|
File file = new File(filePath);
|
long fileSize = file.length();
|
return String.valueOf(fileSize);
|
}
|
|
/**
|
* 申请文件上传租约。
|
*
|
* @param client 客户端对象
|
* @param categoryId 类目ID
|
* @param fileName 文件名称
|
* @param fileMd5 文件的MD5值
|
* @param fileSize 文件大小(以字节为单位)
|
* @param workspaceId 业务空间ID
|
* @return 阿里云百炼服务的响应对象
|
*/
|
public static ApplyFileUploadLeaseResponse applyLease(com.aliyun.bailian20231229.Client client, String categoryId, String fileName, String fileMd5, String fileSize, String workspaceId) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest applyFileUploadLeaseRequest = new com.aliyun.bailian20231229.models.ApplyFileUploadLeaseRequest();
|
applyFileUploadLeaseRequest.setFileName(fileName);
|
applyFileUploadLeaseRequest.setMd5(fileMd5);
|
applyFileUploadLeaseRequest.setSizeInBytes(fileSize);
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
ApplyFileUploadLeaseResponse applyFileUploadLeaseResponse = null;
|
applyFileUploadLeaseResponse = client.applyFileUploadLeaseWithOptions(categoryId, workspaceId, applyFileUploadLeaseRequest, headers, runtime);
|
return applyFileUploadLeaseResponse;
|
}
|
|
/**
|
* 上传文件到临时存储。
|
*
|
* @param preSignedUrl 上传租约中的 URL
|
* @param headers 上传请求的头部
|
* @param filePath 文件本地路径
|
* @throws Exception 如果上传过程中发生错误
|
*/
|
public static void uploadFile(String preSignedUrl, Map<String, String> headers, String filePath) throws Exception {
|
File file = new File(filePath);
|
if (!file.exists() || !file.isFile()) {
|
throw new IllegalArgumentException("文件不存在或不是普通文件: " + filePath);
|
}
|
|
try (FileInputStream fis = new FileInputStream(file)) {
|
URL url = new URL(preSignedUrl);
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
conn.setRequestMethod("PUT");
|
conn.setDoOutput(true);
|
|
// 设置上传请求头
|
conn.setRequestProperty("X-bailian-extra", headers.get("X-bailian-extra"));
|
conn.setRequestProperty("Content-Type", headers.get("Content-Type"));
|
|
// 分块读取并上传文件
|
byte[] buffer = new byte[4096];
|
int bytesRead;
|
while ((bytesRead = fis.read(buffer)) != -1) {
|
conn.getOutputStream().write(buffer, 0, bytesRead);
|
}
|
|
int responseCode = conn.getResponseCode();
|
if (responseCode != 200) {
|
throw new RuntimeException("上传失败: " + responseCode);
|
}
|
}
|
}
|
|
/**
|
* 将文件添加到类目中。
|
*
|
* @param client 客户端对象
|
* @param leaseId 租约ID
|
* @param parser 用于文件的解析器
|
* @param categoryId 类目ID
|
* @param workspaceId 业务空间ID
|
* @return 阿里云百炼服务的响应对象
|
*/
|
public static AddFileResponse addFile(com.aliyun.bailian20231229.Client client, String leaseId, String parser, String categoryId, String workspaceId) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
com.aliyun.bailian20231229.models.AddFileRequest addFileRequest = new com.aliyun.bailian20231229.models.AddFileRequest();
|
addFileRequest.setLeaseId(leaseId);
|
addFileRequest.setParser(parser);
|
addFileRequest.setCategoryId(categoryId);
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
return client.addFileWithOptions(workspaceId, addFileRequest, headers, runtime);
|
}
|
|
/**
|
* 查询文件的基本信息。
|
*
|
* @param client 客户端对象
|
* @param workspaceId 业务空间ID
|
* @param fileId 文件ID
|
* @return 阿里云百炼服务的响应对象
|
*/
|
public static DescribeFileResponse describeFile(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
return client.describeFileWithOptions(workspaceId, fileId, headers, runtime);
|
}
|
|
/**
|
* 在阿里云百炼服务中创建知识库(初始化)。
|
*
|
* @param client 客户端对象
|
* @param workspaceId 业务空间ID
|
* @param fileId 文件ID
|
* @param name 知识库名称
|
* @param structureType 知识库的数据类型
|
* @param sourceType 应用数据的数据类型,支持类目类型和文件类型
|
* @param sinkType 知识库的向量存储类型
|
* @return 阿里云百炼服务的响应对象
|
*/
|
public static CreateIndexResponse createIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String fileId, String name, String structureType, String sourceType, String sinkType) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
com.aliyun.bailian20231229.models.CreateIndexRequest createIndexRequest = new com.aliyun.bailian20231229.models.CreateIndexRequest();
|
createIndexRequest.setStructureType(structureType);
|
createIndexRequest.setName(name);
|
createIndexRequest.setSourceType(sourceType);
|
createIndexRequest.setSinkType(sinkType);
|
createIndexRequest.setDocumentIds(Collections.singletonList(fileId));
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
return client.createIndexWithOptions(workspaceId, createIndexRequest, headers, runtime);
|
}
|
|
/**
|
* 向阿里云百炼服务提交索引任务。
|
*
|
* @param client 客户端对象
|
* @param workspaceId 业务空间ID
|
* @param indexId 知识库ID
|
* @return 阿里云百炼服务的响应对象
|
*/
|
public static SubmitIndexJobResponse submitIndex(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
com.aliyun.bailian20231229.models.SubmitIndexJobRequest submitIndexJobRequest = new com.aliyun.bailian20231229.models.SubmitIndexJobRequest();
|
submitIndexJobRequest.setIndexId(indexId);
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
return client.submitIndexJobWithOptions(workspaceId, submitIndexJobRequest, headers, runtime);
|
}
|
|
/**
|
* 查询索引任务状态。
|
*
|
* @param client 客户端对象
|
* @param workspaceId 业务空间ID
|
* @param jobId 任务ID
|
* @param indexId 知识库ID
|
* @return 阿里云百炼服务的响应对象
|
*/
|
public static GetIndexJobStatusResponse getIndexJobStatus(com.aliyun.bailian20231229.Client client, String workspaceId, String jobId, String indexId) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
com.aliyun.bailian20231229.models.GetIndexJobStatusRequest getIndexJobStatusRequest = new com.aliyun.bailian20231229.models.GetIndexJobStatusRequest();
|
getIndexJobStatusRequest.setIndexId(indexId);
|
getIndexJobStatusRequest.setJobId(jobId);
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
GetIndexJobStatusResponse getIndexJobStatusResponse = null;
|
getIndexJobStatusResponse = client.getIndexJobStatusWithOptions(workspaceId, getIndexJobStatusRequest, headers, runtime);
|
return getIndexJobStatusResponse;
|
}
|
|
/**
|
* 从指定的文档类知识库中永久删除一个或多个文件
|
*
|
* @param client 客户端(Client)
|
* @param workspaceId 业务空间ID
|
* @param indexId 知识库ID
|
* @param fileId 文件ID
|
* @return 阿里云百炼服务的响应
|
*/
|
public static DeleteIndexDocumentResponse deleteIndexDocument(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
DeleteIndexDocumentRequest deleteIndexDocumentRequest = new DeleteIndexDocumentRequest();
|
deleteIndexDocumentRequest.setIndexId(indexId);
|
deleteIndexDocumentRequest.setDocumentIds(Collections.singletonList(fileId));
|
com.aliyun.teautil.models.RuntimeOptions runtime = new com.aliyun.teautil.models.RuntimeOptions();
|
return client.deleteIndexDocumentWithOptions(workspaceId, deleteIndexDocumentRequest, headers, runtime);
|
}
|
|
/**
|
* 使用阿里云百炼服务创建知识库。
|
*
|
* @param filePath 文件本地路径
|
* @param workspaceId 业务空间ID
|
* @param name 知识库名称
|
* @return 如果成功,返回知识库ID;否则返回 null
|
*/
|
public static String createKnowledgeBase(String filePath, String workspaceId, String name) {
|
// 设置默认值
|
String categoryId = "default";
|
String parser = "DASHSCOPE_DOCMIND";
|
String sourceType = "DATA_CENTER_FILE";
|
String structureType = "unstructured";
|
String sinkType = "DEFAULT";
|
try {
|
// 步骤1:初始化客户端(Client)
|
System.out.println("步骤1:初始化Client");
|
com.aliyun.bailian20231229.Client client = KnowledgeBaseUtil.createClient();
|
|
// 步骤2:准备文件信息
|
System.out.println("步骤2:准备文件信息");
|
String fileName = new File(filePath).getName();
|
String fileMd5 = calculateMD5(filePath);
|
String fileSize = getFileSize(filePath);
|
|
// 步骤3:申请上传租约
|
System.out.println("步骤3:向阿里云百炼申请上传租约");
|
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, workspaceId);
|
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
|
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
|
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
|
|
// 步骤4:上传文件
|
System.out.println("步骤4:上传文件到阿里云百炼");
|
// 请自行安装jackson-databind
|
// 将上一步的uploadHeaders转换为Map(Key-Value形式)
|
ObjectMapper mapper = new ObjectMapper();
|
Map<String, String> uploadHeadersMap = (Map<String, String>) mapper.readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
|
uploadFile(uploadUrl, uploadHeadersMap, filePath);
|
|
// 步骤5:将文件添加到服务器
|
System.out.println("步骤5:将文件添加到阿里云百炼服务器");
|
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
|
String fileId = addResponse.getBody().getData().getFileId();
|
|
// 步骤6:检查文件状态
|
System.out.println("步骤6:检查阿里云百炼中的文件状态");
|
while (true) {
|
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
|
String status = describeResponse.getBody().getData().getStatus();
|
System.out.println("当前文件状态:" + status);
|
|
if (status.equals("INIT")) {
|
System.out.println("文件待解析,请稍候...");
|
} else if (status.equals("PARSING")) {
|
System.out.println("文件解析中,请稍候...");
|
} else if (status.equals("PARSE_SUCCESS")) {
|
System.out.println("文件解析完成!");
|
break;
|
} else {
|
System.out.println("未知的文件状态:" + status + ",请联系技术支持。");
|
return null;
|
}
|
TimeUnit.SECONDS.sleep(5);
|
}
|
|
// 步骤7:初始化知识库
|
System.out.println("步骤7:在阿里云百炼中创建知识库");
|
CreateIndexResponse indexResponse = createIndex(client, workspaceId, fileId, name, structureType, sourceType, sinkType);
|
String indexId = indexResponse.getBody().getData().getId();
|
|
// 步骤8:提交索引任务
|
System.out.println("步骤8:向阿里云百炼提交索引任务");
|
SubmitIndexJobResponse submitResponse = submitIndex(client, workspaceId, indexId);
|
String jobId = submitResponse.getBody().getData().getId();
|
|
// 步骤9:获取索引任务状态
|
System.out.println("步骤9:获取阿里云百炼索引任务状态");
|
while (true) {
|
GetIndexJobStatusResponse getStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
|
String status = getStatusResponse.getBody().getData().getStatus();
|
System.out.println("当前索引任务状态:" + status);
|
|
if (status.equals("COMPLETED")) {
|
break;
|
}
|
TimeUnit.SECONDS.sleep(5);
|
}
|
|
System.out.println("阿里云百炼知识库创建成功!");
|
return indexId;
|
|
} catch (Exception e) {
|
System.out.println("发生错误:" + e.getMessage());
|
e.printStackTrace();
|
return null;
|
}
|
}
|
|
|
|
/**
|
* 使用阿里云百炼服务更新知识库
|
*
|
* @param filePath 文件(更新后的)的实际本地路径
|
* @param workspaceId 业务空间ID
|
* @param indexId 需要更新的知识库ID
|
* @param oldFileId 需要更新的文件的FileID
|
* @return 如果成功,返回知识库ID;否则返回 null
|
*/
|
public static String updateKnowledgeBase(String filePath, String workspaceId, String indexId, String oldFileId) {
|
// 设置默认值
|
String categoryId = "default";
|
String parser = "DASHSCOPE_DOCMIND";
|
String sourceType = "DATA_CENTER_FILE";
|
try {
|
// 步骤1:初始化客户端(Client)
|
System.out.println("步骤1:创建Client");
|
com.aliyun.bailian20231229.Client client = createClient();
|
|
// 步骤2:准备文件信息(更新后的文件)
|
System.out.println("步骤2:准备文件信息");
|
String fileName = Paths.get(filePath).getFileName().toString();
|
String fileMd5 = calculateMD5(filePath);
|
String fileSize = getFileSize(filePath);
|
|
// 步骤3:申请上传租约
|
System.out.println("步骤3:向阿里云百炼申请上传租约");
|
ApplyFileUploadLeaseResponse leaseResponse = applyLease(client, categoryId, fileName, fileMd5, fileSize, workspaceId);
|
String leaseId = leaseResponse.getBody().getData().getFileUploadLeaseId();
|
String uploadUrl = leaseResponse.getBody().getData().getParam().getUrl();
|
Object uploadHeaders = leaseResponse.getBody().getData().getParam().getHeaders();
|
|
// 步骤4:上传文件到临时存储
|
System.out.println("步骤4:上传文件到临时存储");
|
// 请自行安装jackson-databind
|
// 将上一步的uploadHeaders转换为Map(Key-Value形式)
|
ObjectMapper mapper = new ObjectMapper();
|
Map<String, String> uploadHeadersMap = (Map<String, String>) mapper.readValue(mapper.writeValueAsString(uploadHeaders), Map.class);
|
uploadFile(uploadUrl, uploadHeadersMap, filePath);
|
|
// 步骤5:添加文件到类目中
|
System.out.println("步骤5:添加文件到类目中");
|
AddFileResponse addResponse = addFile(client, leaseId, parser, categoryId, workspaceId);
|
String fileId = addResponse.getBody().getData().getFileId();
|
|
// 步骤6:检查更新后的文件状态
|
System.out.println("步骤6:检查阿里云百炼中的文件状态");
|
while (true) {
|
DescribeFileResponse describeResponse = describeFile(client, workspaceId, fileId);
|
String status = describeResponse.getBody().getData().getStatus();
|
System.out.println("当前文件状态:" + status);
|
if ("INIT".equals(status)) {
|
System.out.println("文件待解析,请稍候...");
|
} else if ("PARSING".equals(status)) {
|
System.out.println("文件解析中,请稍候...");
|
} else if ("PARSE_SUCCESS".equals(status)) {
|
System.out.println("文件解析完成!");
|
break;
|
} else {
|
System.out.println("未知的文件状态:" + status + ",请联系技术支持。");
|
return null;
|
}
|
Thread.sleep(5000);
|
}
|
|
// 步骤7:提交追加文件任务
|
System.out.println("步骤7:提交追加文件任务");
|
SubmitIndexAddDocumentsJobResponse indexAddResponse = submitIndexAddDocumentsJob(client, workspaceId, indexId, fileId, sourceType);
|
String jobId = indexAddResponse.getBody().getData().getId();
|
|
// 步骤8:等待追加任务完成
|
System.out.println("步骤8:等待追加任务完成");
|
while (true) {
|
GetIndexJobStatusResponse jobStatusResponse = getIndexJobStatus(client, workspaceId, jobId, indexId);
|
String status = jobStatusResponse.getBody().getData().getStatus();
|
System.out.println("当前索引任务状态:" + status);
|
if ("COMPLETED".equals(status)) {
|
break;
|
}
|
Thread.sleep(5000);
|
}
|
|
// 步骤9:删除旧文件
|
System.out.println("步骤9:删除旧文件");
|
deleteIndexDocument(client, workspaceId, indexId, oldFileId);
|
|
System.out.println("阿里云百炼知识库更新成功!");
|
return indexId;
|
} catch (Exception e) {
|
System.out.println("发生错误:" + e.getMessage());
|
return null;
|
}
|
}
|
|
/**
|
* 向一个文档类知识库追加导入已解析的文件
|
*
|
* @param client 客户端(Client)
|
* @param workspaceId 业务空间ID
|
* @param indexId 知识库ID
|
* @param fileId 文件ID
|
* @param sourceType 数据类型
|
* @return 阿里云百炼服务的响应
|
*/
|
public static SubmitIndexAddDocumentsJobResponse submitIndexAddDocumentsJob(com.aliyun.bailian20231229.Client client, String workspaceId, String indexId, String fileId, String sourceType) throws Exception {
|
Map<String, String> headers = new HashMap<>();
|
SubmitIndexAddDocumentsJobRequest submitIndexAddDocumentsJobRequest = new SubmitIndexAddDocumentsJobRequest();
|
submitIndexAddDocumentsJobRequest.setIndexId(indexId);
|
submitIndexAddDocumentsJobRequest.setDocumentIds(Collections.singletonList(fileId));
|
submitIndexAddDocumentsJobRequest.setSourceType(sourceType);
|
RuntimeOptions runtime = new RuntimeOptions();
|
return client.submitIndexAddDocumentsJobWithOptions(workspaceId, submitIndexAddDocumentsJobRequest, headers, runtime);
|
}
|
|
|
|
/**
|
* 主函数。
|
*/
|
// public static void main(String[] args) {
|
// Scanner scanner = new Scanner(System.in);
|
//
|
// String filePath = "D:\\项目\\大模型\\阿里云百炼\\知识库\\薪资谈判常见100问与答.md";
|
//
|
// System.out.print("请为您的知识库输入一个名称:");
|
// String kbName = scanner.nextLine();
|
//
|
// String workspaceId = WORKSPACE_ID;
|
// String result = createKnowledgeBase(filePath, workspaceId, kbName);
|
// if (result != null) {
|
// System.out.println("知识库ID: " + result);
|
// }
|
// }
|
|
/**
|
* 主函数。
|
*/
|
public static void main(String[] args) {
|
|
String filePath = "D:\\项目\\大模型\\阿里云百炼\\知识库\\薪资谈判常见100问与答.md";
|
String indexId = "xlmj6e7ix1"; // 即 AddFile 接口返回的 FileId。您也可以在阿里云百炼控制台的应用数据页面,单击文件名称旁的 ID 图标获取。
|
String oldFileId = "file_5c9f7e4e0f3e4b4ea2bd208a1b4f5e6f_12629554";
|
|
String workspaceId = WORKSPACE_ID;
|
String result = updateKnowledgeBase(filePath, workspaceId, indexId, oldFileId);
|
if (result != null) {
|
System.out.println("知识库更新成功,返回知识库ID: " + result);
|
} else {
|
System.out.println("知识库更新失败。");
|
}
|
}
|
|
|
}
|