diff --git a/VideoAnalysis/Learn.VideoAnalysis.csproj b/VideoAnalysis/Learn.VideoAnalysis.csproj index 39f817c..4c77aa5 100644 --- a/VideoAnalysis/Learn.VideoAnalysis.csproj +++ b/VideoAnalysis/Learn.VideoAnalysis.csproj @@ -44,6 +44,9 @@ + + Always + true diff --git a/VideoAnalysis/Program.cs b/VideoAnalysis/Program.cs index 34a7737..9d61e40 100644 --- a/VideoAnalysis/Program.cs +++ b/VideoAnalysis/Program.cs @@ -109,6 +109,7 @@ namespace Learn.VideoAnalysis AppCommon.Services = app.Services; + // Configure the HTTP request pipeline. if (app.Environment.IsDevelopment()) { diff --git a/VideoAnalysis/appsettings.json b/VideoAnalysis/appsettings.json index 60d4698..b48376c 100644 --- a/VideoAnalysis/appsettings.json +++ b/VideoAnalysis/appsettings.json @@ -22,6 +22,11 @@ "Token": "" } }, + "SimpLetex": { + "Host": "https://server.simpletex.cn/api/", + "AppSecret": "05ZbPfCFZgTmfd4uIqHHc9pHgYR2V8bk", + "AppId": "GH2OXwuxSZEH5W28H61bdSzD" + }, "Redis": { "ConnectionString": "127.0.0.1:6379,password=Woshiren123,defaultDatabase=10" }, @@ -48,11 +53,6 @@ "aliyun": { "Host": "https://dashscope.aliyuncs.com/compatible-mode/", "ApiKey": "sk-1742c2bf7b9846ae835de598dc6c427b" - }, - "SimpLetex": { - "Host": "https://api.deepseek.com/chat/completions", - "AppSecret": "05ZbPfCFZgTmfd4uIqHHc9pHgYR2V8bk", - "AppId": "GH2OXwuxSZEH5W28H61bdSzD" } }, "DB": { diff --git a/VideoAnalysisCore/AICore/FFMPGE/FFMPGEHandle.cs b/VideoAnalysisCore/AICore/FFMPGE/FFMPGEHandle.cs index 2b7f09a..3768cb3 100644 --- a/VideoAnalysisCore/AICore/FFMPGE/FFMPGEHandle.cs +++ b/VideoAnalysisCore/AICore/FFMPGE/FFMPGEHandle.cs @@ -59,7 +59,7 @@ namespace VideoAnalysisCore.AICore.FFMPGE FileSystem.DeleteFile(jpgFile, UIOption.OnlyErrorDialogs, RecycleOption.DeletePermanently); RedisExpand.SetTaskProgress(task, "Frame=>20%"); - await ffmpeg.ExecuteAsync($"-i {filePath} -vf \"fps=1/{intervalSec},scale=960:540\" {localPath}/frame_%03d.jpg", cToken); + await ffmpeg.ExecuteAsync($"-i {filePath} -vf \"fps=1/{intervalSec},scale=960:540\" {localPath}/{ExpandFunction.FrameName}%03d.jpg", cToken); //视频关键帧分析 var frameFiles = Directory.GetFiles(localPath, "*.jpg") diff --git a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs index a5f5ca7..3910804 100644 --- a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs +++ b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs @@ -15,6 +15,7 @@ using Mapster; using System.Linq; using System.Security.Cryptography; using static System.Collections.Specialized.BitVector32; +using FFmpeg.NET.Services; namespace VideoAnalysisCore.AICore.GPT.DeepSeek { @@ -27,408 +28,23 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek private readonly Repository criteriaDB; private readonly Repository videoTaskDB; private readonly Repository videoKonwPointDB; + private readonly Repository videoQuestionDB; private readonly Repository knowledgeInfoDB; + private readonly SimpLetexClient simpLetexClient; /// /// 初始化 /// /// /// - public DeepSeek_GPT(DeepSeekGPTClient moonshotClient, Repository criteria, Repository videoTaskDB, Repository knowledgeInfoDB, Repository videoKonwPointDB) + public DeepSeek_GPT(DeepSeekGPTClient moonshotClient, Repository criteria, Repository videoTaskDB, Repository knowledgeInfoDB, Repository videoKonwPointDB, SimpLetexClient simpLetexClient, Repository videoQuestionDB) { chatClient = moonshotClient; criteriaDB = criteria; this.videoTaskDB = videoTaskDB; this.knowledgeInfoDB = knowledgeInfoDB; this.videoKonwPointDB = videoKonwPointDB; - } - - - /// - /// 获取知识点 - /// - /// 任务id - /// - public async Task GetKnow1(string task) - { - var taskId = long.Parse(task); - var taskInfo = await videoTaskDB.AsQueryable() - .Where(s => s.Id == taskId) - .FirstAsync(); - var subject = "数学"; - var Course_Id = 27; - switch (taskInfo.Type)//处理不同任务类型的知识点树 - { - case TaskTypeEnum.蓝鲸智库_中职视频分段: - Course_Id = 51; - break; - case TaskTypeEnum.蓝鲸智库_视频分段: - default: - Course_Id = 27; - break; - } - var xkwKnows = await knowledgeInfoDB.AsQueryable() - .Where(s => s.Course_Id == Course_Id - && (s.Depth == 3 - || s.Depth == 2)) - .Select(s => s.Name).ToArrayAsync(); - string title = taskInfo.MediaName; - var speakerArr = JsonSerializer.Deserialize(taskInfo.Speaker); - var captionsArr = JsonSerializer.Deserialize(taskInfo.Captions); - - var fileNameResFormat = "{授课章节: string|null}"; - var rCaptionArr = string.Join(',', captionsArr - .Where((s, i) => i % 3 == 0) - .Take((int)(captionsArr?.Length ?? 0 / 2.2)) - .Select(s => s.Text)); - - var fileNamePostMessages = - "这是一堂课的部分授课字幕,请你基于字幕内容帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." + - $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." + - $"以下是包含时间的视频字幕文本。" + - $"字幕列表 {rCaptionArr}。" + - $"输出格式 json字符串 对象格式{fileNameResFormat}"; - - var fileNameInfoRes = await ChatAsync - (task, fileNamePostMessages, null); - - - var captions = ExpandFunction.GetSpeakerCaptions(captionsArr, speakerArr); - var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0; - var criteriaBuilder = new StringBuilder(); - - var know = await knowledgeInfoDB.GetFirstAsync(s => s.Course_Id == Course_Id && s.Name == fileNameInfoRes.授课章节); - if (know is null) - throw new Exception("未能找到对应知识点=>" + fileNameInfoRes.授课章节); - //提升到父级 - var kInfo = await knowledgeInfoDB.GetByIdAsync(know.Parent_Id); - var knowledgeInfos = await knowledgeInfoDB.AsQueryable().ToChildListAsync(s => s.Parent_Id, kInfo.Parent_Id == 0 ? kInfo.Id : kInfo.Parent_Id); - var knows = string.Join(',', knowledgeInfos.Select(s => s.Id + "|" + s.Name)); - var knowDic = knowledgeInfos - .OrderBy(s => s.Id) - .GroupBy(s => s.Name) - .ToDictionary(s => s.First().Name, s => s.First().Id); - VideoKnowRes[] questionRes; - while (true) - { - - var resFormat = """[{"StartTime":开始秒(number),"Theme":主题(string),"Content":内容总结(string)}]"""; - var postMessages = - $"你的任务是分析视频字幕内容并提取出中国高考考试试题方法点,然后分析出<知识块>来帮助学生快速了解视频字幕的内容。" + - $"通过阅读并理解字幕内容.然后识别出{subject}学科中属于{fileNameInfoRes.授课章节}章节相关的时间段。" + - $"关联合并知识内容相似的知识点来合并为<知识块>。" + - $"分配空余未使用的时间段到内容相近的<知识块>时间区间来获取更加详细的上下文,但是请避免<知识块>之间时间重合。" + - $"字幕格式(开始秒:结束秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" + - $"字幕列表 {captions.Captions}。" + - $"最后请检查某些<知识块>之间的过渡是否自然,如果<知识块>时长超过500秒则考虑拆封为两个更加贴切的<知识块>。" + - $"请检查<知识块>时长小于30秒则考虑合并<知识块>到相邻的<知识块>)。" + - $"输出内容只返回json格式({resFormat})"; - - Console.WriteLine(DateTime.Now + "=>1.开始分析视频内容"); - - questionRes = await ChatAsync(task, postMessages, null); - - if (questionRes.Length <= 3) - throw new Exception("视频分段数量过低 =>" + questionRes.Length); - questionRes = questionRes.OrderBy(s => s.StartTime).ToArray(); - - var thems = JsonSerializer.Serialize(questionRes.Adapt());// string.Join(',', questionRes.Select(s => s.StartTime + "->" + s.Theme)); - var checkResFormat1 = """[{"StartTime":开始秒(number),"KnowPoint":知识点名称(string),"KnowPointId":知识点Id(string)}]"""; - var knowMessages = - $"我针对{subject}课堂授课视频分析出了视频的授课阶段片段。" + - $"现在需要你通过每个片段的内容总结来分配正确的知识点(单个片段允许多个知识点用逗号','分割)。" + - $"这是我的分段 {thems}。" + - $"课堂内容与{fileNameInfoRes.授课章节}章节相关" + - $"最后请确保分配的知识点是用户提供的,并且一定正确合理!" + - $"输出内容只返回json格式({checkResFormat1})" + - $" 格式 (方法点Id|方法点名称) " + - $"提供的知识点名称({knows})。"; - Console.WriteLine(DateTime.Now + "=>2.开始分析视频内容知识点"); - var konwRes = await ChatAsync(task, knowMessages, null); - - for (int i = 0; i < konwRes.Count(); i++) - questionRes[i].KnowPoint = konwRes[i].KnowPoint; - for (int i = 0; i < questionRes.Length; i++) - { - var item = questionRes[i]; - if (i == questionRes.Length - 1) - item.EndTime = maxVideoTime; - else - item.EndTime = (int)(questionRes[i + 1]?.StartTime ?? 0) - 1; - } - thems = JsonSerializer.Serialize(questionRes.Adapt()); - var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}"""; - var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" + - $"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" + - $"分段的主题内容,知识点分配是否合理符合实际吗?" + - $"请给出你的打分(0-100,70分及格)以及打分原因。" + - $"这是我的分段 {thems}。" + - $"后续的内容是包含时间戳的视频字幕的固定格式文本。" + - $"字幕格式(说话人:开始秒:结束秒:内容|下一段字幕).以下是包含时间的视频字幕文本。字幕列表 {captions.Captions}。" + - $"最后输出格式为json({checkResFormat})"; - - Console.WriteLine(DateTime.Now + "=>3.开始检查视频分段结果"); - var checkRes = await ChatAsync(task, checkMessage, null); - if (checkRes != null && checkRes.Score >= 80) - { - break; - } - else - { - Console.WriteLine(DateTime.Now + $"=>{task} 得分过低/分段长度不匹配 得分{checkRes?.Score} "); - Console.WriteLine(checkRes.Evaluation); - Console.WriteLine(); - } - if (questionRes.Any(s => s.KeepTime < 30)) - { - Console.WriteLine(DateTime.Now + "=>视频分段过短!! 重新进行AI分析"); - continue; - } - - } - - //todo 未包含的知识点片段 如何处理 - var insertData = questionRes - .Where(s => !string.IsNullOrEmpty(s.KnowPoint)) - .SelectMany( - s => - { - var ks = s.KnowPoint.Split(",").Distinct(); - return ks.Where(x => knowDic.ContainsKey(x)) - .Select(x => new VideoKonwPoint() - { - Content = s.Content, - Theme = s.Theme, - StartTime = s.StartTime, - EndTime = s.EndTime, - KnowPoint = x, - KnowPointId = knowDic[x].ToString(), - TagId = taskInfo.TagId, - VideoTaskId = taskInfo.Id, - }); - }).ToList(); - await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskId); - await videoKonwPointDB.InsertRangeAsync(insertData); - - - - - await RedisExpand.Redis - .HMSetAsync(RedisExpandKey.Task(task), "VideoKnows", questionRes); - - var gptRes = new TaskRes(captions); - await RedisExpand.Redis - .HMSetAsync(RedisExpandKey.Task(task), "ChatAnalysis", gptRes); - RedisExpand.InsertChannel(RedisChannelEnum.EndTask, task); - return gptRes; - } - - - - - /// - /// 获取知识点 - /// - /// 任务id - /// - public async Task GetKnow_v1(string task) - { - var taskId = long.Parse(task); - var taskInfo = await videoTaskDB.AsQueryable() - .Where(s => s.Id == taskId) - .FirstAsync(); - var subject = "数学"; - var Course_Id = 27; - switch (taskInfo.Type)//处理不同任务类型的知识点树 - { - case TaskTypeEnum.蓝鲸智库_中职视频分段: - Course_Id = 51; - break; - case TaskTypeEnum.蓝鲸智库_视频分段: - default: - Course_Id = 27; - break; - } - var xkwKnows = await knowledgeInfoDB.AsQueryable() - .Where(s => s.Course_Id == Course_Id - && (s.Depth == 3 - || s.Depth == 2)) - .Select(s => s.Name).ToArrayAsync(); - string title = taskInfo.MediaName; - var speakerArr = JsonSerializer.Deserialize(taskInfo.Speaker); - var captionsArr = JsonSerializer.Deserialize(taskInfo.Captions); - - var fileNameResFormat = "{授课章节: string|null}"; - //var fileNamePostMessages = title + - // " 这是一堂课的标题,请你基于标题帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." + - // $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." + - // $"输出格式 json字符串 对象格式{fileNameResFormat}"; - var rCaptionArr = string.Join(',', captionsArr - .Where((s, i) => i % 3 == 0) - .Take((int)(captionsArr?.Length ?? 0 / 2.2)) - .Select(s => s.Text)); - - var fileNamePostMessages = - "这是一堂课的部分授课字幕,请你基于字幕内容帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." + - $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." + - $"以下是包含时间的视频字幕文本。" + - $"字幕列表 {rCaptionArr}。" + - $"输出格式 json字符串 对象格式{fileNameResFormat}"; - - var fileNameInfoRes = await ChatAsync - (task, fileNamePostMessages, null); -#if DEBUG - fileNameInfoRes = new FileNameInfo() { 授课章节 = "一元二次不等式" }; -#endif - var captions = ExpandFunction.GetSpeakerCaptions(captionsArr, speakerArr); - var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0; - var criteriaBuilder = new StringBuilder(); - var know = await knowledgeInfoDB.GetFirstAsync(s => s.Course_Id == Course_Id && s.Name == fileNameInfoRes.授课章节); - if (know is null) - throw new Exception("未能找到对应知识点=>" + fileNameInfoRes.授课章节); - - await RedisExpand.Redis - .HMSetAsync(RedisExpandKey.Task(task), "学科章节", fileNameInfoRes.授课章节); - //提升到父级 - var kInfo = await knowledgeInfoDB.GetByIdAsync(know.Parent_Id); - var knowledgeInfos = await knowledgeInfoDB.AsQueryable().ToChildListAsync(s => s.Parent_Id, kInfo.Parent_Id == 0 ? kInfo.Id : kInfo.Parent_Id); - var knows = string.Join(',', knowledgeInfos.Select(s => s.Id + "|" + s.Name)); - var knowDic = knowledgeInfos - .OrderBy(s => s.Id) - .GroupBy(s => s.Name) - .ToDictionary(s => s.First().Name, s => s.First().Id); - var questionRes = new List(); - while (true) - { - questionRes = new List(); - var lastTime = 0; - var endTime = 0; - var timeSpan = 700; - while (endTime + 60 < maxVideoTime) - { - try - { - endTime = lastTime + timeSpan; - var nowCaptionStr = string.Join('|', captionsArr - .Where(s => s.Text != "。") - .Where((s, i) => s.Start > lastTime && s.End < endTime) - .Select(s => s.Start + ":" + s.Text)); - var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]"""; - var postMessages = - $"你的任务是分析出中国{subject}课堂标准流程的授课阶段智能分段" + - $"课堂内容与{fileNameInfoRes.授课章节}章节相关" + - $"课堂标准流程包含以下7个阶段:课程引入/新知讲解/例题精讲/课堂练习/互动讨论/课程总结/作业布置。" + - $"通过时间段的主要讲解内容分析出对应的时间段内容总结。" + - $"通过生成的内容总结分析出对应的时间段主题。 " + - $"本次只分析一个阶段(优先按照阶段顺序 例如 课程引入优先)。 " + - $"最后请检查某些时间段的时常 如果超出800秒或者低于30秒则不符合条件!" + - $"输出内容只返回json格式({resFormat})" + - $"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的部分视频字幕文本。" + - $"字幕列表 {nowCaptionStr} 字幕结束!"; - - Console.WriteLine(DateTime.Now + $"=>1.开始分析视频内容 {lastTime}~{endTime}"); - questionRes.AddRange(await ChatAsync(task, postMessages, null)); - lastTime = (int)questionRes.Last().EndTime.Value - (lastTime == 0 ? 0 : 30); - } - catch (Exception ex) - { - Console.WriteLine(DateTime.Now + $"=>分析视频内容失败 {lastTime}~{endTime}"); - Console.WriteLine(DateTime.Now + ex.Message); - Console.WriteLine(DateTime.Now + ex.StackTrace); - endTime = lastTime - timeSpan; - - } - } - questionRes = questionRes.OrderBy(s => s.StartTime).ToList(); - - var thems = JsonSerializer.Serialize(questionRes.Adapt());// string.Join(',', questionRes.Select(s => s.StartTime + "->" + s.Theme)); - var checkResFormat1 = """[{"StartTime":开始秒(number),"KnowPoint":知识点名称(string),"KnowPointId":知识点Id(string)}]"""; - var knowMessages = - $"我针对{subject}课堂授课视频分析出了视频的授课阶段片段。" + - $"现在需要你通过每个片段的内容总结来分配正确的知识点(单个片段允许多个知识点用逗号','分割)。" + - $"这是我的分段 {thems}。" + - $"课堂内容与{fileNameInfoRes.授课章节}章节相关" + - $"最后请确保分配的知识点是用户提供的,并且一定正确合理!" + - $"输出内容只返回json格式({checkResFormat1})" + - $" 格式 (方法点Id|方法点名称) " + - $"提供的知识点名称({knows})。"; - Console.WriteLine(DateTime.Now + "=>2.开始分析视频内容知识点"); - var konwRes = await ChatAsync(task, knowMessages, null); - - for (int i = 0; i < konwRes.Count(); i++) - questionRes[i].KnowPoint = konwRes[i].KnowPoint; - //for (int i = 0; i < questionRes.Length; i++) - //{ - // var item = questionRes[i]; - // if (i == questionRes.Length - 1) - // item.EndTime = maxVideoTime; - // else - // item.EndTime = (int)(questionRes[i + 1]?.StartTime ?? 0) - 1; - //} - thems = JsonSerializer.Serialize(questionRes.Adapt()); - var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}"""; - var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" + - $"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" + - $"分段的主题内容,知识点分配是否合理符合实际吗?" + - $"请给出你的打分(0-100,70分及格)以及打分原因。" + - $"这是我的分段 {thems}。" + - $"后续的内容是包含时间戳的视频字幕的固定格式文本。" + - $"字幕格式(说话人:开始秒:结束秒:内容|下一段字幕).以下是包含时间的视频字幕文本。字幕列表 {captions.Captions}。" + - $"最后输出格式为json({checkResFormat})"; - - Console.WriteLine(DateTime.Now + "=>3.开始检查视频分段结果"); - var checkRes = await ChatAsync(task, checkMessage, null); - if (checkRes != null && checkRes.Score >= 80) - { - break; - } - else - { - Console.WriteLine(DateTime.Now + $"=>{task} 得分过低/分段长度不匹配 得分{checkRes?.Score} "); - Console.WriteLine(checkRes.Evaluation); - Console.WriteLine(); - } - if (questionRes.Any(s => s.KeepTime < 30)) - { - Console.WriteLine(DateTime.Now + "=>视频分段过短!! 重新进行AI分析"); - continue; - } - - } - - //todo 未包含的知识点片段 如何处理 - var insertData = questionRes - .Where(s => !string.IsNullOrEmpty(s.KnowPoint)) - .SelectMany( - s => - { - var ks = s.KnowPoint.Split(",").Distinct(); - return ks.Where(x => knowDic.ContainsKey(x)) - .Select(x => new VideoKonwPoint() - { - Content = s.Content, - Theme = s.Theme, - StartTime = s.StartTime, - EndTime = s.EndTime, - KnowPoint = x, - KnowPointId = knowDic[x].ToString(), - TagId = taskInfo.TagId, - VideoTaskId = taskInfo.Id, - }); - }).ToList(); - await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskId); - await videoKonwPointDB.InsertRangeAsync(insertData); - - - - - await RedisExpand.Redis - .HMSetAsync(RedisExpandKey.Task(task), "VideoKnows", questionRes); - - var gptRes = new TaskRes(captions); - await RedisExpand.Redis - .HMSetAsync(RedisExpandKey.Task(task), "ChatAnalysis", gptRes); - RedisExpand.InsertChannel(RedisChannelEnum.EndTask, task); - return gptRes; + this.simpLetexClient = simpLetexClient; + this.videoQuestionDB = videoQuestionDB; } /// /// 获取内容对应的章节 @@ -490,12 +106,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek KnowPointId = knowDic[x].ToString(), TagId = taskInfo.TagId, VideoTaskId = taskInfo.Id, - Stage =s?.Stage?.ToEnum() + Stage = s?.Stage?.ToEnum() }); }).ToList(); } + /// /// 获取内容对应的章节 /// @@ -539,8 +156,6 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek return fileNameInfoRes?.授课章节; } - - /// /// 检查AI切片结果质量 /// @@ -566,7 +181,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek /// 优化字幕 /// /// - private async Task 优化字幕(VideoTask taskInfo, + private async Task OptimizeSubtitles(VideoTask taskInfo, SenseVoiceRes[] captionsArr, string sections) { if (!string.IsNullOrEmpty(taskInfo.CaptionsAI)) @@ -577,7 +192,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek var totalCount = captionsArr.Length / spanCount + 1; await Parallel.ForAsync(0, totalCount, - new ParallelOptions() { MaxDegreeOfParallelism =10 }, + new ParallelOptions() { MaxDegreeOfParallelism = 10 }, async (s, c) => { while (true) @@ -625,6 +240,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek .ExecuteCommandAsync(); return res; } + /// /// 视频AI分析字幕 /// @@ -702,11 +318,14 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek break; } + //AnalysisVideoQuestions(taskInfo,) + + var captionsArr = JsonSerializer.Deserialize(taskInfo.Captions); //处理视频授课章节 var sections = await GetSections(taskInfo, Course_Id); //AI优化字幕 - captionsArr = await 优化字幕(taskInfo, captionsArr, sections); + captionsArr = await OptimizeSubtitles(taskInfo, captionsArr, sections); //合并字幕 var captions = ExpandFunction.GetSpeakerCaptions(captionsArr); var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0; @@ -752,7 +371,6 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek return null; } - public async Task ChatAsync(string task, string postMessages, string postMessages1, string model = "deepseek-reasoner") { Message[] messageArr = [ @@ -815,5 +433,67 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek } throw new Exception(DateTime.Now + "=>ChatGPT请求失败次数过多!!!"); } + + + + /// + /// 优化字幕 + /// + /// + private async Task AnalysisVideoQuestions(VideoTask taskInfo) + { + + if (taskInfo is null || !string.IsNullOrEmpty(taskInfo.PPTKeyFrame)) + return null; + var farmeArr = JsonSerializer.Deserialize(taskInfo.PPTKeyFrame); + var videoKnowArr = await videoKonwPointDB.GetListAsync(s => s.VideoTaskId == taskInfo.Id); + var insertData =new List(); + foreach (var item in farmeArr) + { + var knowInfoArr = videoKnowArr + .Where(s => item >= s.StartTime && item <= s.EndTime) + .ToArray(); + if (knowInfoArr is null || knowInfoArr.Count() ==0) + return null; + while (true) + { + try + { + var sRes = await simpLetexClient + .ProcessImageAsync(new SimpleTexOcrRequest(taskInfo.FramePath(item))); + if (!sRes.Success) + continue; + var knowArr=string.Join(',', knowInfoArr.Select(s => s.KnowPoint + "|"+s.KnowPointId)); + var resFormat = """[{"TopicStem":string(试题题干),"Question:string(问题)","KnowPointId":(string)知识点ID(多个使用逗号拼接)}]"""; + var postMessages = + $"提供一段内容是md格式的试题内容字符串。" + + $"请提取出其中的试题内容。并且为他们关联上在我限定范围内的知识点。" + + $"知识点范围[{knowArr}]。" + + $"排除不是试题内容的文字,优化公式排版并且去除题号。" + + $"如果存在多道大题,请帮忙拆分开!" + + $"输出内容只返回json格式为({resFormat})" + + $"以下是试题内容" + + $"`{sRes.Result.res.info.markdown}`"; + var resData = await ChatAsync(taskInfo.Id.ToString(), postMessages, null, "deepseek-chat"); + foreach (var q in resData) + { + q.VideoTaskId = taskInfo.Id; + q.VideoKonwPoint = knowInfoArr.First().Id; + } + //处理知识点 + insertData.AddRange(resData); + } + catch (Exception) + { + + } + } + } + + + await videoQuestionDB.InsertRangeAsync(insertData); + + return null; + } } } diff --git a/VideoAnalysisCore/Common/AppCommon.cs b/VideoAnalysisCore/Common/AppCommon.cs index 994cd94..334d1cb 100644 --- a/VideoAnalysisCore/Common/AppCommon.cs +++ b/VideoAnalysisCore/Common/AppCommon.cs @@ -14,6 +14,7 @@ using System.Text.RegularExpressions; using System.Threading.Tasks; using UserCenter.Model.Interface; using VideoAnalysisCore.AICore.SherpaOnnx; +using VideoAnalysisCore.Model; using VideoAnalysisCore.Model.Dto; using VideoAnalysisCore.Model.Enum; using VideoAnalysisCore.Model.Interface; @@ -85,6 +86,22 @@ namespace VideoAnalysisCore.Common { static Dictionary FormulaData; static string FormulaDataKey; + /// + /// 帧文件名称 + /// + public static string FrameName = "frame_"; + + /// + /// 获取视频帧文件资源路径 + /// + /// + /// 帧文件 对应的时间轴 + /// + public static string FramePath(this VideoTask task, int fTime) + { + return Path.Combine(task.Id.ToString().LocalPath(), $"{FrameName}{fTime / 5}.jpg"); + } + /// /// 识别字符串中的json字符串 /// diff --git a/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs b/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs index 01a2dfb..1046c44 100644 --- a/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs +++ b/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Linq; using System.Net.Http; using System.Net.Http.Headers; +using System.Net.Http.Json; using System.Net.Sockets; using System.Security.Cryptography; using System.Text; @@ -19,11 +20,36 @@ using VideoAnalysisCore.Job; namespace VideoAnalysisCore.Common { + + + public class SimpleTexOcrResponseData + { + public bool status { get; set; } + public SimpleTexOcrResponseDataRes res { get; set; } + public string request_id { get; set; } + } + + public class SimpleTexOcrResponseDataRes + { + public string type { get; set; } + public SimpleTexOcrResponseDataInfo info { get; set; } + } + + public class SimpleTexOcrResponseDataInfo + { + public string markdown { get; set; } + } + + /// /// 请求参数 /// public class SimpleTexOcrRequest { + public SimpleTexOcrRequest(string filePath) + { + file = File.OpenRead(filePath); + } /// /// 合法的图片二进制文件信息,包括png/jpg等格式,无法开启批量调用,仅支持一次上传一张图片 /// @@ -36,7 +62,7 @@ namespace VideoAnalysisCore.Common /// /// 开启后,模型将基于0°,90°, 180°, 270°自动矫正上传图片的方向,默认不开启 /// - public bool enable_img_rot { get; set; }=false; + public bool enable_img_rot { get; set; } = false; /// /// 用于修改行内公式在markdown中的包裹符号。以Json形式填入,如果格式错误将使用默认的包裹符号 /// 示例:["$","$"] @@ -54,7 +80,7 @@ namespace VideoAnalysisCore.Common public class SimpleTexOcrResponse { public bool Success { get; set; } - public string Result { get; set; } + public SimpleTexOcrResponseData Result { get; set; } public string Error { get; set; } } @@ -62,7 +88,6 @@ namespace VideoAnalysisCore.Common public class SimpLetexClient { private readonly IHttpClientFactory _httpClientFactory; - private const string ApiUrl = "https://server.simpletex.cn/api/simpletex_ocr"; public SimpLetexClient( IHttpClientFactory httpClientFactory) { @@ -77,8 +102,7 @@ namespace VideoAnalysisCore.Common // 添加文件内容 var fileContent = new StreamContent(request.file); - fileContent.Headers.ContentType = new MediaTypeHeaderValue("image/jpeg"); - content.Add(fileContent, nameof(request.file)); + content.Add(fileContent, nameof(request.file), Path.GetFileName(request.file.Name)); // 添加并收集其他参数 if (request.rec_mode != "auto") @@ -105,17 +129,15 @@ namespace VideoAnalysisCore.Common } // 生成鉴权参数 - var randomStr = Guid.NewGuid().ToString().Take(16).ToString(); + var randomStr = Guid.NewGuid().ToString("N").Substring(16); var timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString(); var appId = AppCommon.Config.SimpLetex.AppId; - // 添加鉴权参数到签名集合 - parameters["random-str"] = randomStr; parameters["timestamp"] = timestamp; + parameters["random-str"] = randomStr; parameters["app-id"] = appId; // 生成签名 - var signStr = string.Join("&", parameters .OrderBy(p => p.Key) .Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}")) @@ -123,7 +145,8 @@ namespace VideoAnalysisCore.Common var sign = ComputeMD5(signStr); // 创建请求并添加Header - var requestMessage = new HttpRequestMessage(HttpMethod.Post, "api/simpletex_ocr") + var requestMessage = new HttpRequestMessage(HttpMethod.Post, + AppCommon.Config.SimpLetex.Host + "simpletex_ocr") { Content = content }; @@ -136,7 +159,7 @@ namespace VideoAnalysisCore.Common try { var response = await client.SendAsync(requestMessage); - var responseContent = await response.Content.ReadAsStringAsync(); + var responseContent = await response.Content.ReadFromJsonAsync(); return new SimpleTexOcrResponse { @@ -178,7 +201,7 @@ namespace VideoAnalysisCore.Common /// public static class SSimpLetexExtensions { - public static IServiceCollection AddSimpleTexOcrClient( this IServiceCollection services) + public static IServiceCollection AddSimpleTexOcrClient(this IServiceCollection services) { services.AddSingleton(); return services; diff --git a/VideoAnalysisCore/Common/RedisExpand.cs b/VideoAnalysisCore/Common/RedisExpand.cs index b600c60..5443354 100644 --- a/VideoAnalysisCore/Common/RedisExpand.cs +++ b/VideoAnalysisCore/Common/RedisExpand.cs @@ -126,8 +126,7 @@ namespace VideoAnalysisCore.Common { foreach (var item in taskIds) tran.LPush(RedisExpandKey.ChannelKey, item); - object[] ret = tran.Exec(); - Console.WriteLine(ret[0] + ", " + ret[2]); + tran.Exec(); } } /// diff --git a/VideoAnalysisCore/Controllers/LJZK_Controller.cs b/VideoAnalysisCore/Controllers/LJZK_Controller.cs index c116291..0cc5a9f 100644 --- a/VideoAnalysisCore/Controllers/LJZK_Controller.cs +++ b/VideoAnalysisCore/Controllers/LJZK_Controller.cs @@ -103,7 +103,8 @@ namespace VideoAnalysisCore.Controllers if (videoIdArr.Contains(s.VideoCode)) continue; var pptInfo = req.AnalyzeItems //ȡppt videoCode - .FirstOrDefault(x => x.AttachmentsInfoType == AttachmentsInfoType.PPT && s.StructurePageContentId == x.StructurePageContentId); + .FirstOrDefault(x => x.AttachmentsInfoType == AttachmentsInfoType.PPT + && s.StructurePageContentId == x.StructurePageContentId); var pptCode = pptInfo?.VideoCode ?? (pptInfo?.VideoUrl??string.Empty); videos.Add(new VideoTask() { @@ -161,10 +162,10 @@ namespace VideoAnalysisCore.Controllers { if ( string.IsNullOrEmpty(tagId)) return BadRequest(); - - var taskId = int.Parse(tagId); + long taskId = 0; + var taskIdOK = long.TryParse(tagId,out taskId); var task = await videoTaskDB.AsQueryable() - .WhereIF(taskId != 0, s => s.Id == taskId || s.TagId == tagId || s.PPTVideoCode== tagId) + .Where(s=> s.TagId == tagId || s.PPTVideoCode== tagId || (taskIdOK&& s.Id == taskId)) .FirstAsync(); if (task is null) return BadRequest("Ч"); diff --git a/VideoAnalysisCore/Model/VideoQuestion.cs b/VideoAnalysisCore/Model/VideoQuestion.cs new file mode 100644 index 0000000..9fc66da --- /dev/null +++ b/VideoAnalysisCore/Model/VideoQuestion.cs @@ -0,0 +1,52 @@ +using SqlSugar; +using System.ComponentModel.DataAnnotations; +using System.Net; +using System.Text.Json; +using UserCenter.Model.Enum; +using VideoAnalysisCore.AICore.GPT.Dto; +using VideoAnalysisCore.AICore.SherpaOnnx; +using VideoAnalysisCore.Model.Enum; +using VideoAnalysisCore.Model.Interface; +using Whisper.net; + +namespace VideoAnalysisCore.Model +{ + /// + /// 视频片段试题 + /// + [SugarTable("videoquestion")] + public class VideoQuestion : IDB + { + /// + /// id + /// + [SugarColumn(IsPrimaryKey = true, IsIdentity = true)] + public long Id { get; set; } + /// + /// 视频任务id + /// + public long VideoTaskId { get; set; } + /// + /// 视频片段ID + /// 隶属于 + /// + public long VideoKonwPoint { get; set; } + + /// + /// 知识点 + /// + public string? KnowPoint { get; set; } + /// + /// 知识点ID + /// + public string? KnowPointId { get; set; } + /// + /// 题干 + /// + public string? TopicStem { get; set; } + /// + /// 问题(设问) + /// + public string? Question { get; set; } + } +}