From 02518a1c4f5d08f4356f24ff48f7a47b8cc6c264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E8=82=A5=E7=BE=8A?= <1048382248@qq.com> Date: Wed, 4 Jun 2025 17:44:33 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=20=E5=A4=8D=E4=B9=A0?= =?UTF-8?q?=E8=AF=BE=E5=88=87=E9=A2=98=E6=8F=90=E7=A4=BA=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../AICore/GPT/DeepSeek/DeepSeekClient.cs | 3 +- .../AICore/GPT/DeepSeek/DeepSeekModel.cs | 2 +- .../AICore/GPT/DeepSeek/DeepSeek_GPT.cs | 54 +++++---- VideoAnalysisCore/Common/AppCommon.cs | 44 -------- VideoAnalysisCore/Common/AppConfig.cs | 1 + .../Common/Expand/AlibabaCloudVodExpand.cs | 4 +- .../Common/Expand/AliyunOSSExpand.cs | 10 +- .../Common/Expand/SimpLetexExpand.cs | 27 ++++- VideoAnalysisCore/Common/JsonExtractor.cs | 105 ++++++++++++++++++ 9 files changed, 174 insertions(+), 76 deletions(-) create mode 100644 VideoAnalysisCore/Common/JsonExtractor.cs diff --git a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekClient.cs b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekClient.cs index eb13cb4..978e454 100644 --- a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekClient.cs +++ b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekClient.cs @@ -154,7 +154,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek var messageBuilder1 = new StringBuilder(); var lastChat = new ChatResSSE(); var splitCount = "data:".Length; - var maxLoop = 60*1000; + var maxLoop = 60*10000; int threshold = 0; while (maxLoop>0) { @@ -205,6 +205,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek } } } + Console.WriteLine(DateTime.Now + "=>AI请求超时 " + chatReq.taskId); return null; } diff --git a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekModel.cs b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekModel.cs index 3460ed1..2e4b41c 100644 --- a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekModel.cs +++ b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeekModel.cs @@ -38,7 +38,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek /// 一种替代温度采样的方法,称为原子核采样, 其中,模型考虑具有top_p概率的标记的结果 质量。所以 0.1 表示仅包含前 10% 概率质量的代币 被考 /// 建议与联动 /// - public float top_p { get; set; } = 0.5f; + public float top_p { get; set; } = 0.1f; /// /// 一个对象,用于指定模型必须输出的格式。设置为 enable 结构化输出,确保模型与您提供的 JSON 匹配 图式。 /// diff --git a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs index e257259..686461b 100644 --- a/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs +++ b/VideoAnalysisCore/AICore/GPT/DeepSeek/DeepSeek_GPT.cs @@ -18,6 +18,7 @@ using static System.Collections.Specialized.BitVector32; using FFmpeg.NET.Services; using Aliyun.OSS; using Yitter.IdGenerator; +using VideoAnalysisCore.Common.Expand; namespace VideoAnalysisCore.AICore.GPT.DeepSeek { @@ -172,10 +173,14 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek { //校验结果质量 var thems = JsonSerializer.Serialize(questionRes.Adapt()); + var pptFormat = taskInfo.VideoType==AttachmentsInfoType.PPT + ? "这堂课是习题课,所讲解内容都是试题。" + : string.Empty; var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}"""; var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" + - $"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" + + $"检查这些分段的时间是否合理 与相邻的时间段间隔是否大于30秒?" + $"分段的主题内容,知识点分配是否合理符合实际吗?" + + $"{pptFormat}" + $"请给出你的打分(0-100,70分及格)以及打分原因。" + $"这是我的分段 {thems}。" + $"后续的内容是包含时间戳的视频字幕的固定格式文本。" + @@ -257,33 +262,39 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek TotalCaptionsDto captions, string sections) { var tryCount = 10; - while (tryCount-->10) + while (tryCount-->0) { try { var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) ? string.Empty - : $"视频授课内容发生了变化的时间节点{taskInfo.PPTKeyFrame},授课阶段应当在附近时间发生变化。"; + : $"图像视频中授课内容PPT发生了变化的时间节点是{taskInfo.PPTKeyFrame},授课阶段结果可以参考这些时间节点。"; var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]"""; var exerciseClass = taskInfo?.VideoType == AttachmentsInfoType.Review - ? $"本堂课是习题课,每个阶段因当是不同得例题讲解片段,并且所有的授课阶段都视为例题精讲。" - : $"完整的课堂标准流程包含以下5个阶段:课程引入/新知讲解/例题精讲/课堂练习/知识总结。" + - $"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" + - $"通过生成的内容总结分析出对应的授课阶段主题。 "; + ? $"但是本堂课是习题课,所以每个阶段是不同的例题讲解内容。" + : string.Empty; //$"请注意 本次分析的视频字幕只是其中一部分 不需要分析出所有类型的授课阶段。"; var postMessages = - $"请通过视频字幕内容分析出视频中{taskInfo.Subject}课堂的授课阶段。" + - $"课堂内容与{sections}章节相关。" + + $"请通过视频字幕内容分析出视频中课堂的授课阶段。" + + $"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。" + $"{keyFrameArr}" + + $"完整的课堂标准流程包含以下5个阶段:课程引入/新知讲解/例题精讲/课堂练习/知识总结。"+ $"{exerciseClass}" + - $"最后请检查每个授课阶段的时长,不允许出现超出800秒或者低于50秒的授课阶段。" + + $"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" + + $"通过生成的内容总结分析出对应的授课阶段主题。 "+ + $"最后请检查每个授课阶段的时长,不允许出现低于50秒的阶段。" + $"输出内容只返回json格式({resFormat})" + $"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" + $"字幕列表 {captions.Captions} 字幕结束!"; Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id.ToString()}.开始分析视频内容 {tryCount}"); - questionRes.AddRange(await ChatAsync(taskInfo.Id.ToString(), postMessages, "分析字幕")); + var resData = await ChatAsync(taskInfo.Id.ToString(), postMessages, "分析字幕"); + if (taskInfo?.VideoType == AttachmentsInfoType.Review) + foreach (var item in resData) + item.Stage = StageEnum.例题精讲.ToString(); + questionRes.AddRange(resData); + break; } catch (Exception ex) { @@ -305,6 +316,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek { taskId = task, model = model, + max_tokens= model== "deepseek-reasoner"?16000:8000, stream = true, temperature = 0.2f, messages = messageArr @@ -325,13 +337,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek redisCached[1] = new object[] { chatResp.Value.res, chatResp.Value.u, chatResp.Value.reasoning }; RedisExpand.SetTaskGPTCached(task, time, redisCached); } - chatResContent = chatResContent?.Replace("字幕内容", "课堂情况"); + chatResContent = chatResContent?.ExtractJsonStrings()?.FirstOrDefault(); chatResContent = chatResContent?.Replace("\n", ""); chatResContent = chatResContent?.Replace("```json", ""); chatResContent = chatResContent?.Replace("```", ""); chatResContent = chatResContent?.Replace("}{", "},{"); chatResContent = chatResContent?.Replace("}|{", "},{"); - chatResContent = chatResContent?.Trim().ExtractJson().FirstOrDefault(); + chatResContent = chatResContent?.Trim(); var startsStr = typeof(T).IsArray ? "[" : "{"; var endStr = typeof(T).IsArray ? "]" : "}"; @@ -393,17 +405,21 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek .ProcessImageAsync(new SimpleTexOcrRequest(filePath)); if (!sRes.Success) continue; - var knowArr=string.Join(',', knowInfoArr.Select(s => s.KnowPointId + "|" + s.KnowPoint )); + if(sRes.Result.res.value.Trim().Length<10)//总试题内容长度小于10 视为无效题目 + break; + Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.First().StartTime}秒试题的试题内容"); + Console.WriteLine( sRes.Result.res.value); + var knowArr=JsonSerializer.Serialize(knowInfoArr.Select(s => new { s.KnowPointId, s.KnowPoint })); var resFormat = """[{"TopicStem":string(试题题干),"Question:string(问题)","KnowPointId":(string)知识点ID}]"""; var postMessages = - $"提供一段内容是md格式的试题内容字符串。" + - $"请提取出其中的试题内容。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" + - $"知识点格式(知识点ID|知识点名称)范围[{knowArr}]。" + + $"我将提供一段内容是md格式的试题内容字符串。" + + $"请提取出其中的试题内容以及每个试题的问题。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" + + $"知识点格式(json格式)范围[{knowArr}]。" + $"排除不是试题内容的文字,优化公式排版并且去除题号。" + - $"如果存在多道大题,请帮忙拆分开!" + + $"如果存在多道题(或者小问),则需要拆分成为多个试题对象!" + $"输出内容只返回json格式为({resFormat})" + $"以下是试题内容" + - $"`{sRes.Result.res.info.markdown}`"; + $"`{sRes.Result.res.value}`"; var resData = await ChatAsync(taskInfo.Id.ToString(), postMessages, "提取试题", "deepseek-chat"); foreach (var q in resData) { diff --git a/VideoAnalysisCore/Common/AppCommon.cs b/VideoAnalysisCore/Common/AppCommon.cs index bbf2d48..574601e 100644 --- a/VideoAnalysisCore/Common/AppCommon.cs +++ b/VideoAnalysisCore/Common/AppCommon.cs @@ -102,50 +102,6 @@ namespace VideoAnalysisCore.Common return Path.Combine(task.Id.ToString().LocalPath(), $"{FrameName}{(fTime / 5).ToString().PadLeft(3,'0')}.jpg"); } - /// - /// 识别字符串中的json字符串 - /// - /// - /// - public static List ExtractJson(this string input) - { - List jsonList = new List(); - int index = 0; - - while (index < input.Length) - { - if (input[index] == '{' || input[index] == '[') - { - int startIndex = index; - int openCount = 1; - index++; - - while (index < input.Length && openCount > 0) - { - if (input[index] == '{' || input[index] == '[') - { - openCount++; - } - else if (input[index] == '}' || input[index] == ']') - { - openCount--; - } - index++; - } - - if (openCount == 0) - { - string json = input.Substring(startIndex, index - startIndex); - jsonList.Add(json); - } - } - else - { - index++; - } - } - return jsonList; - } /// /// 处理数学公式 /// diff --git a/VideoAnalysisCore/Common/AppConfig.cs b/VideoAnalysisCore/Common/AppConfig.cs index acbca26..bcb3530 100644 --- a/VideoAnalysisCore/Common/AppConfig.cs +++ b/VideoAnalysisCore/Common/AppConfig.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; +using VideoAnalysisCore.Common.Expand; namespace VideoAnalysisCore.Common { diff --git a/VideoAnalysisCore/Common/Expand/AlibabaCloudVodExpand.cs b/VideoAnalysisCore/Common/Expand/AlibabaCloudVodExpand.cs index 47bff00..07053ce 100644 --- a/VideoAnalysisCore/Common/Expand/AlibabaCloudVodExpand.cs +++ b/VideoAnalysisCore/Common/Expand/AlibabaCloudVodExpand.cs @@ -10,9 +10,9 @@ using AlibabaCloud.TeaUtil.Models; using Microsoft.Extensions.DependencyInjection; using VideoAnalysisCore.Job; -namespace VideoAnalysisCore.Common +namespace VideoAnalysisCore.Common.Expand { - public class AlibabaCloudVodConfig + public class AlibabaCloudVodConfig { /// /// id diff --git a/VideoAnalysisCore/Common/Expand/AliyunOSSExpand.cs b/VideoAnalysisCore/Common/Expand/AliyunOSSExpand.cs index 84355ae..74c4965 100644 --- a/VideoAnalysisCore/Common/Expand/AliyunOSSExpand.cs +++ b/VideoAnalysisCore/Common/Expand/AliyunOSSExpand.cs @@ -18,7 +18,7 @@ using Aliyun.Credentials.Models; using VideoAnalysisCore.Model.Dto; using System.IO; -namespace VideoAnalysisCore.Common +namespace VideoAnalysisCore.Common.Expand { public class AliyunOSSConfig { @@ -65,13 +65,13 @@ namespace VideoAnalysisCore.Common // 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。 AccessKeySecret = AppCommon.Config.AliyunOSS.AccessKeySecret, Endpoint = AppCommon.Config.AliyunOSS.Endpoint, - Region= AppCommon.Config.AliyunOSS.Region, + Region = AppCommon.Config.AliyunOSS.Region, };// 创建ClientConfiguration实例,按照您的需要修改默认参数。 var conf = new ClientConfiguration(); // 设置v4签名。 conf.SignatureVersion = SignatureVersion.V4; // 创建OssClient实例。 - var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf); + var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf); oss.SetRegion(config.Region); service.AddSingleton(oss); @@ -81,7 +81,7 @@ namespace VideoAnalysisCore.Common /// /// /// 视频实体片段 - public static void AddVideoQuestionUrl(this OssClient oss, List fileArr ) + public static void AddVideoQuestionUrl(this OssClient oss, List fileArr) { var cached = new HashSet(); foreach (var item in fileArr) @@ -89,7 +89,7 @@ namespace VideoAnalysisCore.Common try { var path = item.VideoTaskId.ToString() + "/" + Path.GetFileName(item.FilePath); - if (cached.Contains(item.FilePath)) + if (cached.Contains(item.FilePath)) { item.ImageUrl = AppCommon.Config.AliyunOSS.BucketDomain + "/" + path; continue; diff --git a/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs b/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs index 08685bb..a713638 100644 --- a/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs +++ b/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs @@ -7,6 +7,8 @@ using System.Net.Http.Json; using System.Net.Sockets; using System.Security.Cryptography; using System.Text; +using System.Text.Json.Serialization; +using System.Text.Json; using System.Threading.Tasks; using AlibabaCloud.OpenApiClient.Models; using AlibabaCloud.SDK.Vod20170321; @@ -15,10 +17,9 @@ using AlibabaCloud.TeaUtil.Models; using Azure; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Options; -using Newtonsoft.Json; using VideoAnalysisCore.Job; -namespace VideoAnalysisCore.Common +namespace VideoAnalysisCore.Common.Expand { @@ -32,7 +33,10 @@ namespace VideoAnalysisCore.Common public class SimpleTexOcrResponseDataRes { public string type { get; set; } - public SimpleTexOcrResponseDataInfo info { get; set; } + + [JsonPropertyName("info")] // 替换为实际字段名 + public JsonElement DataInfo { get; set; } // 使用JsonElement接收未知类型 + public string value { get; set; } } public class SimpleTexOcrResponseDataInfo @@ -81,6 +85,7 @@ namespace VideoAnalysisCore.Common { public bool Success { get; set; } public SimpleTexOcrResponseData Result { get; set; } + public string ResultStr { get; set; } public string Error { get; set; } } @@ -123,7 +128,7 @@ namespace VideoAnalysisCore.Common if (request.isolated_formula_wrapper != null) { - var isolatedWrapper = JsonConvert.SerializeObject(request.isolated_formula_wrapper); + var isolatedWrapper = JsonSerializer.Serialize(request.isolated_formula_wrapper); content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper)); parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper; } @@ -161,11 +166,25 @@ namespace VideoAnalysisCore.Common var response = await client.SendAsync(requestMessage); var resStr = await response.Content.ReadAsStringAsync(); var responseContent = await response.Content.ReadFromJsonAsync(); + if (responseContent.res.DataInfo.ValueKind == JsonValueKind.Object) + { + responseContent.res.value = JsonSerializer.Deserialize( + responseContent.res.DataInfo.GetRawText(), + new JsonSerializerOptions { PropertyNameCaseInsensitive = true } + )?.markdown??string.Empty; + // 处理字符串 + } + else if (responseContent.res.DataInfo.ValueKind == JsonValueKind.String) + { + responseContent.res.value = responseContent.res.DataInfo.GetString(); + } + request.file.Dispose(); return new SimpleTexOcrResponse { Success = response.IsSuccessStatusCode, Result = responseContent, + ResultStr= resStr, Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}" }; } diff --git a/VideoAnalysisCore/Common/JsonExtractor.cs b/VideoAnalysisCore/Common/JsonExtractor.cs new file mode 100644 index 0000000..4030e0f --- /dev/null +++ b/VideoAnalysisCore/Common/JsonExtractor.cs @@ -0,0 +1,105 @@ +using System; +using System.Collections.Generic; +using System.Text.Json; + +namespace VideoAnalysisCore.Common +{ + public static class JsonExtractor + { + /// + /// 提取json字符串 + /// + /// + /// + public static List ExtractJsonStrings(this string input) + { + List jsonList = new List(); + int index = 0; + while (index < input.Length) + { + if (input[index] == '{' || input[index] == '[') + { + int? endIndex = FindMatchingBracket(input, index); + if (endIndex.HasValue) + { + string candidate = input.Substring(index, endIndex.Value - index + 1); + if (IsValidJson(candidate)) + { + jsonList.Add(candidate); + index = endIndex.Value + 1; + continue; + } + } + } + index++; + } + return jsonList; + } + + private static int? FindMatchingBracket(string str, int start) + { + Stack stack = new Stack(); + bool inString = false; + bool inEscape = false; + + for (int i = start; i < str.Length; i++) + { + char c = str[i]; + + if (inEscape) + { + inEscape = false; + } + else if (inString) + { + if (c == '\\') + inEscape = true; + else if (c == '"') + inString = false; + } + else + { + switch (c) + { + case '{': + case '[': + stack.Push(c); + break; + case '}': + if (stack.Count == 0 || stack.Peek() != '{') + return null; + stack.Pop(); + break; + case ']': + if (stack.Count == 0 || stack.Peek() != '[') + return null; + stack.Pop(); + break; + case '"': + inString = true; + break; + } + } + + if (stack.Count == 0) + return i; + } + return null; // 括号未完全匹配 + } + + public static bool IsValidJson(string candidate) + { + if (string.IsNullOrEmpty(candidate)) + return false; + try + { + JsonDocument.Parse(candidate); + return true; + } + catch (Exception) + { + return false; + } + } + } +} \ No newline at end of file