优化 字幕优化流程
This commit is contained in:
parent
fddcdbc1d9
commit
ce5f549407
|
|
@ -31,7 +31,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
public class GTP_Analysis_1 : IBserGPTWorkflow
|
||||
{
|
||||
private readonly GeminiGPTClient geminiClient;
|
||||
private readonly DeepSeekGPTClient chatClient;
|
||||
private readonly DeepSeekGPTClient deepSeekClient;
|
||||
private readonly BestAIClient chatGPTClient;
|
||||
private readonly Repository<CourseGradingCriteria> criteriaDB;
|
||||
private readonly RedisManager redisManager;
|
||||
|
|
@ -51,7 +51,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
Repository<KnowledgeInfo> knowledgeInfoDB, Repository<VideoKonwPoint> videoKonwPointDB, SimpLetexClient simpLetexClient,
|
||||
Repository<VideoQuestion> videoQuestionDB, OssClient ossClient, Repository<VideoQuestionKonw> videoQuestionKonwDB, RedisManager redisManager, BestAIClient chatGPTClient, GeminiGPTClient geminiClient)
|
||||
{
|
||||
chatClient = moonshotClient;
|
||||
deepSeekClient = moonshotClient;
|
||||
criteriaDB = criteria;
|
||||
this.videoTaskDB = videoTaskDB;
|
||||
this.knowledgeInfoDB = knowledgeInfoDB;
|
||||
|
|
@ -90,7 +90,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
$"输出内容只返回json格式({checkResFormat1})" +
|
||||
$" 格式 (方法点Id|方法点名称) " +
|
||||
$"提供的知识点名称({knows})。";
|
||||
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + "=>2.开始分析视频内容知识点");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, "=>2.开始分析视频内容知识点");
|
||||
VideoKnowRes[] konwRes;
|
||||
var knowOK = false;
|
||||
for (int i = 0; i < 4; i++)
|
||||
|
|
@ -190,7 +190,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.复习
|
||||
? "这堂课是习题课,所讲解内容几乎都是试题。"
|
||||
: string.Empty;
|
||||
var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}""";
|
||||
var checkResFormat = """{"Score":打分(number),"Evaluation":评价以及扣分原因(string)""";//,"Data":优化后的分段(array)}""";
|
||||
var checkMessage =
|
||||
$"""
|
||||
请你担任一位专业的视频内容分析教研老师,擅长评估视频内容的结构和逻辑流暢度。
|
||||
|
|
@ -226,11 +226,11 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
return JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.CaptionsAI);
|
||||
var subject = taskInfo.Subject.ToString();
|
||||
var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length);
|
||||
var spanCount = 50;
|
||||
var spanCount = 75;
|
||||
var totalCount = captionsArr.Length / spanCount + 1;
|
||||
|
||||
await Parallel.ForAsync(0, totalCount,
|
||||
new ParallelOptions() { MaxDegreeOfParallelism = 20 },
|
||||
new ParallelOptions() { MaxDegreeOfParallelism =15 },
|
||||
async (s, c) =>
|
||||
{
|
||||
var cArr = captionsArr
|
||||
|
|
@ -242,17 +242,20 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
var nowCaptionStr = cStrArr.ToJson();
|
||||
var resFormat = """[string(修改结果)]""";
|
||||
var postMessages =
|
||||
$"这是一堂中国{subject}课堂的字幕,由结果是语音识别提供。" +
|
||||
$"字幕内容与{subject}学科{sections}章节相关。" +
|
||||
$"但是识别出来的字幕包含错误的关键字 例如应当是'数列'被识别为'树列'。" +
|
||||
$"所以需要你帮我修复其中的错别字,修复公式排版。" +
|
||||
$"请注意,只允许对字幕进行修改。" +
|
||||
$"输出内容只返回json格式({resFormat})" +
|
||||
$"字幕内容(JSON字符串)" +
|
||||
$"`{nowCaptionStr}`" +
|
||||
$"字幕结束。" +
|
||||
$"最后请确保输出字幕条数与输入字幕条数一致!!! 如果不一致则重新优化并且确保字幕条数一致!!!!";
|
||||
var resData = await chatClient.ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕", "deepseek-chat", 3000);
|
||||
$"角色设定:你是一位专业的中国{subject}学科专家,负责校对关于{sections}内容的课堂教学字幕。\n\n" +
|
||||
$"任务描述:\n" +
|
||||
$"请根据上下文逻辑,对输入的语音识别(STT)字幕进行深度优化。具体要求如下:\n" +
|
||||
$"1. 逻辑纠错:结合{subject}学科背景,利用前后文语义修正所有错误词汇。不仅要修正同音错别词(如:树列改为数列),还要修正因识别模糊导致的语义断裂或学科术语错误。\n" +
|
||||
$"2. 断句与标点:优化字幕的标点符号,并根据老师说话的语感和学科逻辑重新调整断句位置。确保每一条字幕在学术表达上自然、通顺,修复由于语音停顿造成的断句不当。\n" +
|
||||
$"3. 公式规范:将字幕中提到的数学或科学公式统一转化为规范的 LaTeX 格式。\n\n" +
|
||||
$"强制约束:\n" +
|
||||
$"- 数量对齐:输出的字幕条数(Array Length)必须与输入的字幕条数完全一致,严禁合并、拆分或删除任何条目。\n" +
|
||||
$"- 纯净返回:只允许返回 JSON 格式的字符串,严禁包含任何前言、后缀或解释性文字。\n" +
|
||||
$"- 数据格式:JSON 结构必须严格符合:{resFormat}\n\n" +
|
||||
$"待优化字幕内容:\n" +
|
||||
$"{nowCaptionStr}\n\n" +
|
||||
$"最终核对:请确保输出 JSON 中包含的字幕条数与输入的字幕条数完全对应。";
|
||||
var resData = await deepSeekClient.ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕");
|
||||
if (resData.Count() != cArr.Count())
|
||||
{
|
||||
resData = cStrArr.ToArray();
|
||||
|
|
@ -335,7 +338,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
/// <returns></returns>
|
||||
private async Task<SenseVoiceRes[]> AnalysisVideoQuestions(VideoTask taskInfo, List<KnowledgeInfo> knowledgeInfos)
|
||||
{
|
||||
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + $"=>{taskInfo.Id} 提取试题");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"=>{taskInfo.Id} 提取试题");
|
||||
if (taskInfo is null || string.IsNullOrEmpty(taskInfo.PPTKeyFrame))
|
||||
return null;
|
||||
var farmeArr = JsonSerializer.Deserialize<int[]>(taskInfo.PPTKeyFrame);
|
||||
|
|
@ -376,7 +379,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
break;
|
||||
|
||||
#if DEBUG
|
||||
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题的试题内容");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题的试题内容");
|
||||
#endif
|
||||
|
||||
//var knowArr=JsonSerializer.Serialize(knowInfoArr.Select(s => new { s.KnowPointId, s.KnowPoint }));
|
||||
|
|
@ -394,7 +397,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
$"输出内容只返回json格式为({resFormat})" +
|
||||
$"以下是试题内容" +
|
||||
$"`{sRes.Result.res.value}`";
|
||||
var resData = await chatClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
|
||||
var resData = await deepSeekClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
|
||||
//var resData = await chatClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
|
||||
if (resData is null || resData.Count() == 0)
|
||||
break;
|
||||
|
|
@ -435,7 +438,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题出现错误 {ex.Message}");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"=>{taskInfo.Id} 提取{knowInfoArr.StartTime}秒试题出现错误 {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -520,6 +523,9 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
var insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);//ChatGPT
|
||||
//校验结果质量
|
||||
var checkRes = await VerifySpanQuality(questionRes, taskInfo, captions, sections, Course_Id);
|
||||
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果 得分=>{checkRes.Score} ");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, checkRes.Evaluation);
|
||||
|
||||
if (checkRes != null && checkRes.Score >= 80)
|
||||
{
|
||||
|
|
@ -527,10 +533,11 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
|
||||
await videoKonwPointDB.InsertRangeAsync(insertData);
|
||||
break;
|
||||
}
|
||||
}else
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果不合格!即将重试 剩余次数{tryCount}");
|
||||
if (questionRes.Any(s => s.KeepTime < 30))
|
||||
{
|
||||
await redisManager.AddTaskLog(taskInfo.Id, DateTime.Now + "=>视频分段过短!! 重新进行AI分析");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, "=>视频分段过短!! 重新进行AI分析");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue