diff --git a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs index 07d5348..be07654 100644 --- a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs +++ b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs @@ -81,11 +81,11 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek // $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." + // $"输出格式 json字符串 对象格式{fileNameResFormat}"; var rCaptionArr = string.Join(',', captionsArr - .Where((s,i)=>i%3==0) - .Take((int)(captionsArr?.Length??0 / 2.2)) + .Where((s, i) => i % 3 == 0) + .Take((int)(captionsArr?.Length ?? 0 / 2.2)) .Select(s => s.Text)); - var fileNamePostMessages = + var fileNamePostMessages = "这是一堂课的部分授课字幕,请你基于字幕内容帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." + $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." + $"以下是包含时间的视频字幕文本。" + @@ -307,7 +307,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek var lastTime = 0; var endTime = 0; var timeSpan = 700; - while (endTime + 60< maxVideoTime) + while (endTime + 60 < maxVideoTime) { try { @@ -331,7 +331,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek Console.WriteLine(DateTime.Now + $"=>1.开始分析视频内容 {lastTime}~{endTime}"); questionRes.AddRange(await ChatAsync(task, postMessages, null)); - lastTime = (int)questionRes.Last().EndTime.Value - (lastTime==0?0: 30); + lastTime = (int)questionRes.Last().EndTime.Value - (lastTime == 0 ? 0 : 30); } catch (Exception ex) { @@ -438,7 +438,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek /// 获取内容对应的章节 /// /// - public async Task> GetVideoKnow(List questionRes,VideoTask taskInfo, string sections, int course_Id) + private async Task> GetVideoKnow(List questionRes, VideoTask taskInfo, string sections, int course_Id) { var know = await knowledgeInfoDB.GetFirstAsync(s => s.Course_Id == course_Id && s.Name == sections); @@ -496,14 +496,17 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek }); }).ToList(); - + } /// /// 获取内容对应的章节 /// /// - public async Task GetSections(VideoTask taskInfo,int course_Id) + private async Task GetSections(VideoTask taskInfo, int course_Id) { + if (!string.IsNullOrEmpty(taskInfo.Sections)) + return taskInfo.Sections; + var xkwKnows = await knowledgeInfoDB.AsQueryable() .Where(s => s.Course_Id == course_Id && (s.Depth == 3 @@ -527,15 +530,153 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek var task = taskInfo.Id.ToString(); var fileNameInfoRes = await ChatAsync (task, fileNamePostMessages, null); + taskInfo.Sections = fileNameInfoRes.授课章节; await videoTaskDB.AsUpdateable() .SetColumns(it => it.Sections == fileNameInfoRes.授课章节) .Where(it => it.Id == taskInfo.Id) - .ExecuteCommandAsync(); + .ExecuteCommandAsync(); await RedisExpand.Redis .HMSetAsync(RedisExpandKey.Task(task), "学科章节", fileNameInfoRes.授课章节); return fileNameInfoRes?.授课章节; } + + + + /// + /// 检查AI切片结果质量 + /// + /// + private async Task VerifySpanQuality(List questionRes, VideoTask taskInfo, TotalCaptionsDto captions, string sections, int course_Id) + { + //校验结果质量 + var thems = JsonSerializer.Serialize(questionRes.Adapt()); + var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}"""; + var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" + + $"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" + + $"分段的主题内容,知识点分配是否合理符合实际吗?" + + $"请给出你的打分(0-100,70分及格)以及打分原因。" + + $"这是我的分段 {thems}。" + + $"后续的内容是包含时间戳的视频字幕的固定格式文本。" + + $"字幕格式(说话人:开始秒:结束秒:内容|下一段字幕).以下是包含时间的视频字幕文本。字幕列表 {captions.Captions}。" + + $"最后输出格式为json({checkResFormat})"; + Console.WriteLine(DateTime.Now + "=>3.开始检查视频分段结果"); + return await ChatAsync(taskInfo.Id.ToString(), checkMessage, null); + } + + /// + /// 优化字幕 + /// + /// + private async Task> 优化字幕(VideoTask taskInfo, + SenseVoiceRes[] captionsArr, string sections) + { + var subject = taskInfo.Subject.ToString(); + var newCaptionsList = new List(captionsArr.Length); + var spanCount = 50; + var totalCount = captionsArr.Length / spanCount + 1; + + await Parallel.ForAsync(0, totalCount, + new ParallelOptions() { MaxDegreeOfParallelism = 4 }, + async (s, c) => + { + while (true) + { + var cArr = captionsArr + .Skip(spanCount * s) + .Take(spanCount); + if (cArr.Count() == 0) + return; + var nowCaptionStr = string.Join('|', cArr.Select(s => /*s.Start + ":" +*/ s.Text)); + var resFormat = """[string(修改结果)]"""; + var postMessages = + $"这是一堂中国课的字幕由结果是语音识别提供。" + + $"字幕内容与{subject}学科{sections}章节相关。" + + $"需要你帮我修复其中的错别字,修复公式。" + + $"请注意 只允许对字幕进行修改不允许删除字幕的字或者添加字。" + + $"请确保输出字幕条数与输入字幕条数一致!!!" + + $"输出内容只返回json格式({resFormat})" + + $"字幕内容(使用|符号分割)" + + $"`{nowCaptionStr}`" + + $"字幕结束。 "; + Console.WriteLine(DateTime.Now + $"=>字幕优化 分段{s}/{totalCount}开始..."); + var resData = await ChatAsync(taskInfo.Id.ToString(), postMessages, null, "deepseek-chat"); + if (resData.Count() != cArr.Count()) + { + Console.WriteLine(DateTime.Now + $"=>字幕优化 分段{s}/{totalCount} AI结果数量不匹配,重试"); + continue; + } + newCaptionsList.AddRange(resData.Select((text, i) => new SenseVoiceRes() + { + Start = captionsArr[spanCount * s + i].Start, + End = captionsArr[spanCount * s + i].End, + Text = text, + })); + Console.WriteLine(DateTime.Now + $"=>字幕优化 分段{s}/{totalCount}完成√"); + return; + } + }); + Console.WriteLine(DateTime.Now + $"=>字幕优化执行完成"); + var jsonData = JsonSerializer.Serialize(newCaptionsList.OrderBy(s=>s.Start)); + await videoTaskDB.AsUpdateable() + .SetColumns(it => it.CaptionsAI == jsonData) + .Where(it => it.Id == taskInfo.Id) + .ExecuteCommandAsync(); + return newCaptionsList; + } + /// + /// 视频AI分析字幕 + /// + /// + private async Task Analytics(List questionRes, VideoTask taskInfo, + SenseVoiceRes[] captionsArr, string sections) + { + var lastTime = 0; + var endTime = 0; + var maxVideoTime = captionsArr?.LastOrDefault()?.End ?? 0; + var subject = taskInfo.Subject.ToString(); + var timeSpan = (int)(maxVideoTime * 0.5); + while (endTime + 60 < maxVideoTime) + { + try + { + endTime = lastTime + timeSpan; + var nowCaptionStr = string.Join('|', captionsArr + .Where(s => s.Text != "。") + .Where((s, i) => s.Start > lastTime && s.End < endTime) + .Select(s => s.Start + ":" + s.Text)); + var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) + ? string.Empty + : $"视频授课内容发生了变化的时间节点{taskInfo.PPTKeyFrame},授课阶段应当在附近时间发生变化。"; + var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]"""; + var postMessages = + $"请通过视频字幕内容分析出视频中{subject}课堂的授课阶段。" + + $"课堂内容与{sections}章节相关。" + + $"{keyFrameArr}" + + $"完整的课堂标准流程包含以下5个阶段:课程引入/新知讲解/例题精讲/课堂练习/知识总结。" + + $"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" + + $"通过生成的内容总结分析出对应的授课阶段主题。 " + + $"请注意 本次分析的视频字幕只是其中一部分 不需要分析出所有类型的授课阶段。" + + $"最后请检查每个授课阶段的时长,不允许出现超出800秒或者低于50秒的授课阶段。" + + $"输出内容只返回json格式({resFormat})" + + $"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" + + $"字幕列表 {nowCaptionStr} 字幕结束!"; + + Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id.ToString()}.开始分析视频内容 {lastTime}~{endTime}"); + questionRes.AddRange(await ChatAsync(taskInfo.Id.ToString(), postMessages, null)); + lastTime = (int)questionRes.Last().EndTime.Value - (lastTime == 0 ? 0 : 30); + } + catch (Exception ex) + { + Console.WriteLine(DateTime.Now + $"=>分析视频内容失败 {lastTime}~{endTime}"); + Console.WriteLine(DateTime.Now + ex.Message); + Console.WriteLine(DateTime.Now + ex.StackTrace); + endTime = lastTime - timeSpan; + + } + } + } + /// /// 获取知识点 /// @@ -563,6 +704,8 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek var captionsArr = JsonSerializer.Deserialize(taskInfo.Captions); //处理视频授课章节 var sections = await GetSections(taskInfo, Course_Id); + //AI优化字幕 + await 优化字幕(taskInfo, captionsArr, sections); //合并字幕 var captions = ExpandFunction.GetSpeakerCaptions(captionsArr); var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0; @@ -571,64 +714,14 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek while (true) { questionRes = new List(); - var lastTime = 0; - var endTime = 0; - var timeSpan =(int)(maxVideoTime * 0.5); - while (endTime + 60 < maxVideoTime) - { - try - { - endTime = lastTime + timeSpan; - var nowCaptionStr = string.Join('|', captionsArr - .Where(s => s.Text != "。") - .Where((s, i) => s.Start > lastTime && s.End < endTime) - .Select(s => s.Start + ":" + s.Text)); - var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) - ?string.Empty - : $"视频授课内容发生了变化的时间节点{taskInfo.PPTKeyFrame},授课阶段应当在附近时间发生变化。" ; - var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]"""; - var postMessages = - $"请通过视频字幕内容分析出视频中{subject}课堂的授课阶段。" + - $"课堂内容与{sections}章节相关。" + - $"{keyFrameArr}" + - $"完整的课堂标准流程包含以下5个阶段:课程引入/新知讲解/例题精讲/课堂练习/知识总结。" + - $"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" + - $"通过生成的内容总结分析出对应的授课阶段主题。 " + - $"请注意 本次分析的视频字幕只是其中一部分 不需要分析出所有类型的授课阶段。" + - $"最后请检查每个授课阶段的时长,不允许出现超出800秒或者低于50秒的授课阶段。" + - $"输出内容只返回json格式({resFormat})" + - $"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" + - $"字幕列表 {nowCaptionStr} 字幕结束!"; + //视频字幕分析 + await Analytics(questionRes, taskInfo, captionsArr, sections); - Console.WriteLine(DateTime.Now + $"=>1.开始分析视频内容 {lastTime}~{endTime}"); - questionRes.AddRange(await ChatAsync(task, postMessages, null)); - lastTime = (int)questionRes.Last().EndTime.Value - (lastTime == 0 ? 0 : 30); - } - catch (Exception ex) - { - Console.WriteLine(DateTime.Now + $"=>分析视频内容失败 {lastTime}~{endTime}"); - Console.WriteLine(DateTime.Now + ex.Message); - Console.WriteLine(DateTime.Now + ex.StackTrace); - endTime = lastTime - timeSpan; - - } - } if (questionRes.Count == 0) continue; //处理分段 知识点 var insertData = await GetVideoKnow(questionRes, taskInfo, sections, Course_Id); //校验结果质量 - var thems = JsonSerializer.Serialize(questionRes.Adapt()); - var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}"""; - var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" + - $"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" + - $"分段的主题内容,知识点分配是否合理符合实际吗?" + - $"请给出你的打分(0-100,70分及格)以及打分原因。" + - $"这是我的分段 {thems}。" + - $"后续的内容是包含时间戳的视频字幕的固定格式文本。" + - $"字幕格式(说话人:开始秒:结束秒:内容|下一段字幕).以下是包含时间的视频字幕文本。字幕列表 {captions.Captions}。" + - $"最后输出格式为json({checkResFormat})"; - Console.WriteLine(DateTime.Now + "=>3.开始检查视频分段结果"); - var checkRes = await ChatAsync(task, checkMessage, null); + var checkRes = await VerifySpanQuality(questionRes, taskInfo, captions, sections, Course_Id); if (checkRes != null && checkRes.Score >= 80) { @@ -651,9 +744,6 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek } - - - await RedisExpand.Redis .HMSetAsync(RedisExpandKey.Task(task), "VideoKnows", questionRes); @@ -677,12 +767,12 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek { taskId = task, model = model, - stream = model == "deepseek-reasoner", + stream = true, temperature = 0.2f, messages = messageArr }; var tryCount = 10; - while (--tryCount >0) + while (--tryCount > 0) { try { @@ -726,7 +816,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek Console.WriteLine(ex.Message); } } - throw new Exception(DateTime.Now+ "=>ChatGPT请求失败次数过多!!!"); + throw new Exception(DateTime.Now + "=>ChatGPT请求失败次数过多!!!"); } } } diff --git a/VideoAnalysisCore/Model/VideoTask.cs b/VideoAnalysisCore/Model/VideoTask.cs index 42e5d07..afe35c4 100644 --- a/VideoAnalysisCore/Model/VideoTask.cs +++ b/VideoAnalysisCore/Model/VideoTask.cs @@ -84,6 +84,11 @@ namespace VideoAnalysisCore.Model [SugarColumn(ColumnName = "Captions", ColumnDataType = "longtext", IsNullable = true)] public string Captions { get; set; } = "[]"; /// + /// 字幕缓存[AI优化] + /// + [SugarColumn(ColumnName = "CaptionsAI", ColumnDataType = "longtext", IsNullable = true)] + public string CaptionsAI { get; set; } = "[]"; + /// /// 说话人日志解析缓存 /// [SugarColumn(ColumnName = "Speaker", ColumnDataType = "longtext", IsNullable = true)]