优化 字幕优化流程

This commit is contained in:
小肥羊 2025-12-23 18:15:35 +08:00
parent fddcdbc1d9
commit ce5f549407
1 changed files with 30 additions and 23 deletions

View File

@ -31,7 +31,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
public class GTP_Analysis_1 : IBserGPTWorkflow
{
private readonly GeminiGPTClient geminiClient;
private readonly DeepSeekGPTClient chatClient;
private readonly DeepSeekGPTClient deepSeekClient;
private readonly BestAIClient chatGPTClient;
private readonly Repository<CourseGradingCriteria> criteriaDB;
private readonly RedisManager redisManager;
@ -51,7 +51,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
Repository<KnowledgeInfo> knowledgeInfoDB, Repository<VideoKonwPoint> videoKonwPointDB, SimpLetexClient simpLetexClient,
Repository<VideoQuestion> videoQuestionDB, OssClient ossClient, Repository<VideoQuestionKonw> videoQuestionKonwDB, RedisManager redisManager, BestAIClient chatGPTClient, GeminiGPTClient geminiClient)
{
chatClient = moonshotClient;
deepSeekClient = moonshotClient;
criteriaDB = criteria;
this.videoTaskDB = videoTaskDB;
this.knowledgeInfoDB = knowledgeInfoDB;
@ -90,7 +90,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
$"输出内容只返回json格式({checkResFormat1})" +
$" 格式 (方法点Id|方法点名称) " +
$"提供的知识点名称({knows})。";
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + "=>2.开始分析视频内容知识点");
await redisManager.AddTaskLog(taskInfo.Id, "=>2.开始分析视频内容知识点");
VideoKnowRes[] konwRes;
var knowOK = false;
for (int i = 0; i < 4; i++)
@ -190,7 +190,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.
? "这堂课是习题课,所讲解内容几乎都是试题。"
: string.Empty;
var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}""";
var checkResFormat = """{"Score":打分(number),"Evaluation":评价以及扣分原因(string)""";//,"Data":优化后的分段(array)}""";
var checkMessage =
$"""
@ -226,11 +226,11 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
return JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.CaptionsAI);
var subject = taskInfo.Subject.ToString();
var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length);
var spanCount = 50;
var spanCount = 75;
var totalCount = captionsArr.Length / spanCount + 1;
await Parallel.ForAsync(0, totalCount,
new ParallelOptions() { MaxDegreeOfParallelism = 20 },
new ParallelOptions() { MaxDegreeOfParallelism =15 },
async (s, c) =>
{
var cArr = captionsArr
@ -242,17 +242,20 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var nowCaptionStr = cStrArr.ToJson();
var resFormat = """[string(修改结果)]""";
var postMessages =
$"这是一堂中国{subject}课堂的字幕,由结果是语音识别提供。" +
$"字幕内容与{subject}学科{sections}章节相关。" +
$"但是识别出来的字幕包含错误的关键字 例如应当是'数列'被识别为'树列'。" +
$"所以需要你帮我修复其中的错别字,修复公式排版。" +
$"请注意,只允许对字幕进行修改。" +
$"输出内容只返回json格式({resFormat})" +
$"字幕内容(JSON字符串)" +
$"`{nowCaptionStr}`" +
$"字幕结束。" +
$"最后请确保输出字幕条数与输入字幕条数一致!!! 如果不一致则重新优化并且确保字幕条数一致!!!!";
var resData = await chatClient.ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕", "deepseek-chat", 3000);
$"角色设定:你是一位专业的中国{subject}学科专家,负责校对关于{sections}内容的课堂教学字幕。\n\n" +
$"任务描述:\n" +
$"请根据上下文逻辑对输入的语音识别STT字幕进行深度优化。具体要求如下\n" +
$"1. 逻辑纠错:结合{subject}学科背景,利用前后文语义修正所有错误词汇。不仅要修正同音错别词(如:树列改为数列),还要修正因识别模糊导致的语义断裂或学科术语错误。\n" +
$"2. 断句与标点:优化字幕的标点符号,并根据老师说话的语感和学科逻辑重新调整断句位置。确保每一条字幕在学术表达上自然、通顺,修复由于语音停顿造成的断句不当。\n" +
$"3. 公式规范:将字幕中提到的数学或科学公式统一转化为规范的 LaTeX 格式。\n\n" +
$"强制约束:\n" +
$"- 数量对齐输出的字幕条数Array Length必须与输入的字幕条数完全一致严禁合并、拆分或删除任何条目。\n" +
$"- 纯净返回:只允许返回 JSON 格式的字符串,严禁包含任何前言、后缀或解释性文字。\n" +
$"- 数据格式JSON 结构必须严格符合:{resFormat}\n\n" +
$"待优化字幕内容:\n" +
$"{nowCaptionStr}\n\n" +
$"最终核对:请确保输出 JSON 中包含的字幕条数与输入的字幕条数完全对应。";
var resData = await deepSeekClient.ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕");
if (resData.Count() != cArr.Count())
{
resData = cStrArr.ToArray();
@ -335,7 +338,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
/// <returns></returns>
private async Task<SenseVoiceRes[]> AnalysisVideoQuestions(VideoTask taskInfo, List<KnowledgeInfo> knowledgeInfos)
{
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + $"=>{taskInfo.Id} 提取试题");
await redisManager.AddTaskLog(taskInfo.Id, $"=>{taskInfo.Id} 提取试题");
if (taskInfo is null || string.IsNullOrEmpty(taskInfo.PPTKeyFrame))
return null;
var farmeArr = JsonSerializer.Deserialize<int[]>(taskInfo.PPTKeyFrame);
@ -376,7 +379,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
break;
#if DEBUG
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题的试题内容");
await redisManager.AddTaskLog(taskInfo.Id, $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题的试题内容");
#endif
//var knowArr=JsonSerializer.Serialize(knowInfoArr.Select(s => new { s.KnowPointId, s.KnowPoint }));
@ -394,7 +397,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
$"输出内容只返回json格式为({resFormat})" +
$"以下是试题内容" +
$"`{sRes.Result.res.value}`";
var resData = await chatClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
var resData = await deepSeekClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
//var resData = await chatClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
if (resData is null || resData.Count() == 0)
break;
@ -435,7 +438,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
}
catch (Exception ex)
{
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题出现错误 {ex.Message}");
await redisManager.AddTaskLog(taskInfo.Id, $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题出现错误 {ex.Message}");
}
}
}
@ -520,6 +523,9 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);//ChatGPT
//校验结果质量
var checkRes = await VerifySpanQuality(questionRes, taskInfo, captions, sections, Course_Id);
await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果 得分=>{checkRes.Score} ");
await redisManager.AddTaskLog(taskInfo.Id, checkRes.Evaluation);
if (checkRes != null && checkRes.Score >= 80)
{
@ -527,10 +533,11 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
await videoKonwPointDB.InsertRangeAsync(insertData);
break;
}
}else
await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果不合格!即将重试 剩余次数{tryCount}");
if (questionRes.Any(s => s.KeepTime < 30))
{
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + "=>视频分段过短!! 重新进行AI分析");
await redisManager.AddTaskLog(taskInfo.Id, "=>视频分段过短!! 重新进行AI分析");
continue;
}
}