优化 字符串提取JSON函数

优化 AI分析流程
This commit is contained in:
小肥羊 2026-01-06 10:02:10 +08:00
parent ce5f549407
commit 89432c681f
8 changed files with 256 additions and 135 deletions

View File

@ -17,7 +17,7 @@ export interface VideoKnowRes {
startTime: number; startTime: number;
} }
export interface SenseVoiceRes { export interface SenseVoiceRes {
text: string; text: string;
start: number; start: number;
end: number; end: number;
@ -72,4 +72,4 @@ export const ErrorTaskList = (data: any) => {
return http.request<any>("post", "/api/VideoTask/ErrorTaskList", { return http.request<any>("post", "/api/VideoTask/ErrorTaskList", {
data data
}); });
}; };

View File

@ -44,7 +44,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param> /// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
/// <returns></returns> /// <returns></returns>
/// <exception cref="Exception"></exception> /// <exception cref="Exception"></exception>
public async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model = null, int max_tokens = 8000) public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model = null, int max_tokens = 16000)
{ {
Message[] messageArr = [ Message[] messageArr = [
new Message(postMessages,"user"), new Message(postMessages,"user"),
@ -56,7 +56,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
taskId = task, taskId = task,
model = model, model = model,
title = title, title = title,
max_tokens =8000, max_tokens = max_tokens,
stream = true, stream = true,
temperature = 0.2f, temperature = 0.2f,
messages = messageArr messages = messageArr

View File

@ -42,7 +42,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param> /// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
/// <returns></returns> /// <returns></returns>
/// <exception cref="Exception"></exception> /// <exception cref="Exception"></exception>
public async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model =null, int max_tokens = 8000) public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model =null, int max_tokens = 16000)
{ {
Message[] messageArr = [ Message[] messageArr = [
new Message(postMessages,"user"), new Message(postMessages,"user"),

View File

@ -127,9 +127,12 @@ namespace VideoAnalysisCore.AICore.GPT
messageBuilder.Append(str); messageBuilder.Append(str);
if (!string.IsNullOrEmpty(strReasoning)) if (!string.IsNullOrEmpty(strReasoning))
messageBuilder1.Append(strReasoning); messageBuilder1.Append(strReasoning);
var steamCount = messageBuilder.Length + messageBuilder1.Length; if (chatReq.title != "优化字幕")
if (++threshold % 30 == 0) {
redisManager.SetTaskProgress(chatReq.taskId, "steam=>" + steamCount); var steamCount = messageBuilder.Length + messageBuilder1.Length;
if (++threshold % 30 == 0)
redisManager.SetTaskProgress(chatReq.taskId, "steam=>" + steamCount);
}
} }
catch (Exception e) catch (Exception e)
{ {
@ -172,13 +175,14 @@ namespace VideoAnalysisCore.AICore.GPT
redisCached[1] = new object[] { chatResp.Value.res, chatResp.Value.u, chatResp.Value.reasoning }; redisCached[1] = new object[] { chatResp.Value.res, chatResp.Value.u, chatResp.Value.reasoning };
redisManager.SetTaskGPTCached(chatRep.taskId, time, redisCached); redisManager.SetTaskGPTCached(chatRep.taskId, time, redisCached);
} }
chatResContent = chatResContent?.ExtractJsonStrings()?.FirstOrDefault();
chatResContent = chatResContent?.Replace("\n", ""); chatResContent = chatResContent?.Replace("\n", "");
chatResContent = chatResContent?.Replace("```json", ""); chatResContent = chatResContent?.Replace("```json", "");
chatResContent = chatResContent?.Replace("```", ""); chatResContent = chatResContent?.Replace("```", "");
chatResContent = chatResContent?.Replace("}{", "},{"); chatResContent = chatResContent?.Replace("}{", "},{");
chatResContent = chatResContent?.Replace("}|{", "},{"); chatResContent = chatResContent?.Replace("}|{", "},{");
chatResContent = chatResContent?.Trim(); chatResContent = chatResContent?.Trim();
chatResContent = chatResContent?.ExtractJsonStrings()?.FirstOrDefault();
if (string.IsNullOrEmpty(chatResContent)) if (string.IsNullOrEmpty(chatResContent))
throw new Exception($"GPT返回结果无有效JSON =>{chatResp?.res}"); throw new Exception($"GPT返回结果无有效JSON =>{chatResp?.res}");
@ -251,5 +255,25 @@ namespace VideoAnalysisCore.AICore.GPT
} }
throw errorMSG.Last(s => s != null); throw errorMSG.Last(s => s != null);
} }
/// <summary>
/// 请求AI
/// </summary>
/// <typeparam name="T">返回JSON类型</typeparam>
/// <param name="task">任务id</param>
/// <param name="postMessages">提示词</param>
/// <param name="title">任务类型</param>
/// <param name="model">GPT版本 <see cref="ChatGPTType"/></param>
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000</para></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public virtual Task<T> ChatAsync<T>(string task, string postMessages, string title,
string model = null, int max_tokens = 16000)
{
throw new Exception("需要实现");
}
} }
} }

View File

@ -81,21 +81,22 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var thems = questionRes.Adapt<VideoKnowQueryDto[]>().ToJson(); var thems = questionRes.Adapt<VideoKnowQueryDto[]>().ToJson();
var checkResFormat1 = """[{"StartTime":开始秒(number),"KnowPoint":知识点名称(string),"KnowPointId":知识点Id(string)}]"""; var checkResFormat1 = """[{"StartTime":开始秒(number),"KnowPoint":知识点名称(string),"KnowPointId":知识点Id(string)}]""";
var knowMessages = var knowMessages =
$"我针对{taskInfo.Subject}课堂授课视频分析出了视频的授课阶段片段。" + $"我针对{taskInfo.Subject}课堂授课视频分析出了视频的授课阶段片段。\n" +
$"现在需要你通过每个片段的内容总结来分配正确的知识点(单个片段允许多个知识点用逗号','分割)。" + $"现在需要你通过每个片段的内容总结来分配正确的知识点(单个片段允许多个知识点用逗号','分割)。\n" +
$"这是我的分段 {thems}。" + $"这是我的分段 {thems}。\n" +
$"课堂内容与{sections}章节相关" + $"课堂内容与{sections}章节相关\n" +
$"最后请确保分配的知识点是用户提供的,并且一定正确合理!" + $"最后请确保分配的知识点是用户提供的,并且一定正确合理!\n" +
$"返回的片段数量与传入片段数量一致!" + $"返回的片段数量与传入片段数量一致(硬性条件)!\n" +
$"输出内容只返回json格式({checkResFormat1})" + $"输出内容只返回json格式({checkResFormat1})\n" +
$" 格式 (方法点Id|方法点名称) " + $" 格式 (方法点Id|方法点名称) \n" +
$"提供的知识点名称({knows})。"; $"提供的`知识点名称({knows})。\n";
await redisManager.AddTaskLog(taskInfo.Id, "=>2.开始分析视频内容知识点"); await redisManager.AddTaskLog(taskInfo.Id, "=>2.开始分析视频内容知识点");
VideoKnowRes[] konwRes; VideoKnowRes[] konwRes;
var knowOK = false; var knowOK = false;
for (int i = 0; i < 4; i++) var chatClentArr = new GPTClient[] { chatGPTClient, geminiClient,deepSeekClient };
for (int i = 0; i < 3; i++)
{ {
konwRes = await chatGPTClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), knowMessages, "知识点"); konwRes = await chatClentArr[i].ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), knowMessages, "知识点");
// 分析结果的片段数量与预期不匹配 // 分析结果的片段数量与预期不匹配
if (questionRes.Length != konwRes.Length) continue; if (questionRes.Length != konwRes.Length) continue;
for (int xi = 0; xi < konwRes.Count(); xi++) for (int xi = 0; xi < konwRes.Count(); xi++)
@ -103,14 +104,12 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
knowOK = true; knowOK = true;
break; break;
} }
if (!knowOK) if (!knowOK)
{ {
await redisManager.AddTaskLog(taskInfo.Id,"GPT未能分析出有效的分段的知识点"); await redisManager.AddTaskLog(taskInfo.Id, "GPT未能分析出有效的分段的知识点");
throw new Exception("GPT未能分析出有效的分段的知识点"); throw new Exception("GPT未能分析出有效的分段的知识点");
} }
//todo 未包含的知识点片段 如何处理
return questionRes return questionRes
.Where(s => !string.IsNullOrEmpty(s.KnowPoint)) .Where(s => !string.IsNullOrEmpty(s.KnowPoint))
.SelectMany( .SelectMany(
@ -190,7 +189,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var pptFormat = taskInfo.VideoType == AttachmentsInfoType. var pptFormat = taskInfo.VideoType == AttachmentsInfoType.
? "这堂课是习题课,所讲解内容几乎都是试题。" ? "这堂课是习题课,所讲解内容几乎都是试题。"
: string.Empty; : string.Empty;
var checkResFormat = """{"Score":打分(number),"Evaluation":评价以及扣分原因(string)""";//,"Data":优化后的分段(array)}"""; var checkResFormat = """{"Score":打分(number),"Evaluation":扣分原因/改进建议(string)""";//,"Data":优化后的分段(array)}""";
var checkMessage = var checkMessage =
$""" $"""
@ -228,9 +227,11 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length); var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length);
var spanCount = 75; var spanCount = 75;
var totalCount = captionsArr.Length / spanCount + 1; var totalCount = captionsArr.Length / spanCount + 1;
await redisManager.AddTaskLog(taskInfo.Id, $"=>字幕优化");
var chatClentArr = new GPTClient[] { deepSeekClient,chatGPTClient, geminiClient };
await Parallel.ForAsync(0, totalCount, await Parallel.ForAsync(0, totalCount,
new ParallelOptions() { MaxDegreeOfParallelism =15 }, new ParallelOptions() { MaxDegreeOfParallelism = 1 },
async (s, c) => async (s, c) =>
{ {
var cArr = captionsArr var cArr = captionsArr
@ -242,24 +243,33 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var nowCaptionStr = cStrArr.ToJson(); var nowCaptionStr = cStrArr.ToJson();
var resFormat = """[string(修改结果)]"""; var resFormat = """[string(修改结果)]""";
var postMessages = var postMessages =
$"角色设定:你是一位专业的中国{subject}学科专家,负责校对关于{sections}内容的课堂教学字幕。\n\n" + $"角色设定:你是一位专业的中国{subject}学科专家,负责校对关于{sections}内容的课堂教学字幕。\n" +
$"任务描述:\n" + $"任务描述:\n" +
$"请根据上下文逻辑对输入的语音识别STT字幕进行深度优化。具体要求如下\n" + $"请根据上下文逻辑对输入的语音识别STT字幕进行深度优化。具体要求如下\n" +
$"1. 逻辑纠错:结合{subject}学科背景,利用前后文语义修正所有错误词汇。不仅要修正同音错别词(如:树列改为数列),还要修正因识别模糊导致的语义断裂或学科术语错误。\n" + $"1. 逻辑纠错:结合{subject}学科背景,利用前后文语义修正所有错误词汇。不仅要修正同音错别词(如:树列改为数列),还要修正因识别模糊导致的语义断裂或学科术语错误。\n" +
$"2. 断句与标点:优化字幕的标点符号,并根据老师说话的语感和学科逻辑重新调整断句位置。确保每一条字幕在学术表达上自然、通顺,修复由于语音停顿造成的断句不当。\n" + $"2. 断句与标点:优化字幕的标点符号,并根据老师说话的语感和学科逻辑重新调整断句位置。确保每一条字幕在学术表达上自然、通顺,修复由于语音停顿造成的断句不当。\n" +
$"3. 公式规范:将字幕中提到的数学或科学公式统一转化为规范的 LaTeX 格式。\n\n" + $"3. 公式规范:将字幕中提到的数学或科学公式统一转化为规范的 LaTeX 格式(使用$包裹公式,注意严格遵守Json格式的转义符号)。\n" +
$"强制约束:\n" + $"强制约束:\n" +
$"- 数量对齐输出的字幕条数Array Length必须与输入的字幕条数完全一致严禁合并、拆分或删除任何条目。\n" + $"- 数量对齐输出的字幕条数Array Length必须与输入的字幕条数完全一致严禁合并、拆分或删除任何条目。\n" +
$"- 纯净返回:只允许返回 JSON 格式的字符串,严禁包含任何前言、后缀或解释性文字。\n" + $"- 纯净返回:只允许返回 JSON 格式的字符串,严禁包含任何前言、后缀或解释性文字。\n" +
$"- 数据格式JSON 结构必须严格符合:{resFormat}\n\n" + $"- 数据格式JSON 结构必须严格符合:{resFormat}\n" +
$"待优化字幕内容:\n" + $"待优化字幕内容:\n" +
$"{nowCaptionStr}\n\n" + $"{nowCaptionStr}\n" +
$"最终核对:请确保输出 JSON 中包含的字幕条数与输入的字幕条数完全对应。"; $"最终核对:请确保输出 JSON 中包含的字幕条数与输入的字幕条数完全对应。";
var resData = await deepSeekClient.ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕"); string[]? resData = null;
for (int i = 0; i < 3; i++)
{
resData = await chatClentArr[i].ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕", ChatGPTType.Deepseek_Chat, 8000);
if (resData.Count() == cArr.Count())
break;
else
await redisManager.AddTaskLog(taskInfo.Id, $"=>字幕优化 分段{s} AI结果数量不匹配 重试{i}");
}
if (resData.Count() != cArr.Count()) if (resData.Count() != cArr.Count())
{ {
resData = cStrArr.ToArray(); resData = cStrArr.ToArray();
await redisManager.AddTaskLog(taskInfo.Id, $"=>字幕优化 分段{s} AI结果数量不匹配,采用原始值"); await redisManager.AddTaskLog(taskInfo.Id, $"=>字幕优化 分段{s} AI结果数量不匹配 采用原始值");
} }
newCaptionsList.AddRange(resData.Select((text, i) => new SenseVoiceRes() newCaptionsList.AddRange(resData.Select((text, i) => new SenseVoiceRes()
{ {
@ -290,33 +300,34 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{ {
try try
{ {
var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) || string.IsNullOrEmpty(taskInfo?.PPTKeyFrame) //分段超长问题,评分优化如何处理
? $"" var keyFrameStr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) || string.IsNullOrEmpty(taskInfo?.PPTKeyFrame)
: $"初步划分阶段授课PPT发生了变化的时间是{taskInfo.PPTKeyFrame},基于PPT变化时间点将字幕内容分割成时间段。每个时间段的起始和结束应接近这些时间点例如以时间点为中心扩展至内容自然过渡处。"; ? $"请分析授课中字幕描述的知识内容,然后基于视频整体知识点讲解提炼出不同的阶段以便对老师上课内容切片提取为知识库,所以请确保阶段的内容准确性"
: $"授课中老师的PPT在这些时间段内进行了切换{taskInfo.PPTKeyFrame},理应这些时间段内的讲述内容也发生了变化,请你基于PPT变化时间点结合字幕描述的知识内容提炼出不同的切片。每个阶段的起始和结束应接近这些时间点例如以时间点为中心扩展至内容自然过渡处。";
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]"""; var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]""";
var reviewStr = taskInfo?.VideoType == AttachmentsInfoType.
? $"但本堂课是习题课,所以大部分阶段是不同的例题讲解内容。\n"
: string.Empty;
var postMessages = string.Empty; var postMessages = string.Empty;
postMessages = postMessages =
$"请通过视频字幕内容分析出视频中课堂的授课阶段。" + $"请通过视频字幕内容分析出视频中课堂的授课知识点切片\n" +
$"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。" + $"阶段的细分程度到某个知识点的讲解/认识/例题/总结\n" +
$"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。" + $"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。\n" +
(taskInfo?.VideoType == AttachmentsInfoType. $"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。\n" +
? $"但本堂课是习题课,所以大部分阶段是不同的例题讲解内容。" reviewStr +
: string.Empty) + $"初步划分阶段:{keyFrameStr}\n" +
$"{keyFrameArr}" + $"内容分析:对每个时间段,提取主要讲解内容:识别关键词(如“例题”“证明”“练习”“总结”)和内容结构。\n" +
$"内容分析:对每个时间段,提取主要讲解内容:识别关键词(如“例题”“证明”“练习”“总结”)和内容结构。" + $"判断阶段类型:如果内容以解题为主,归类为“例题精讲”;如果涉及新知识讲解,归类为“新知讲解”;以此类推。\n" +
$"判断阶段类型:如果内容以解题为主,归类为“例题精讲”;如果涉及新知识讲解,归类为“新知讲解”;以此类推。" + $"内容总结简述该阶段的核心讲解内容70~200字,确保内容与阶段时间内授课内容符合。\n" +
$"内容总结简述该阶段的核心讲解内容70~200字,确保内容与阶段时间内授课内容符合。" + $"阶段主题:基于内容总结,提炼一个恰当的主题(例如,“柯西不等式的基本应用”)。\n" +
$"阶段主题:基于内容总结,提炼一个恰当的主题(例如,“柯西不等式的基本应用”)。" + $"输出要求:确保阶段划分合理、无` 重叠,且时长符合要求\n" +
$"输出要求:确保阶段划分合理、无重叠,且时长符合要求" + $"输出格式要求内容只返回json格式({resFormat})\n" +
$"输出格式要求内容只返回json格式({resFormat})" + $"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。\n" +
$"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" +
$"字幕列表 {captions.Captions} 字幕结束!"; $"字幕列表 {captions.Captions} 字幕结束!";
await redisManager.AddTaskLog(taskInfo.Id, $"开始分析视频内容 {tryCount}"); await redisManager.AddTaskLog(taskInfo.Id, $"开始分析视频内容 {tryCount}");
//return await chatGPTClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕"); //return await chatGPTClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
var res = await geminiClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕"); var res = await geminiClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
//var r2 = await chatClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕"); //var r2 = await chatClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
return res; return res;
} }
@ -328,7 +339,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
return null; return null;
} }
@ -402,7 +413,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
if (resData is null || resData.Count() == 0) if (resData is null || resData.Count() == 0)
break; break;
foreach (var q in resData) foreach (var q in resData)
{ {
var TopicId = YitIdHelper.NextId(); var TopicId = YitIdHelper.NextId();
foreach (var qt in q.QuestionArr) foreach (var qt in q.QuestionArr)
{ {
@ -442,7 +453,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
} }
} }
} }
insertData = insertData.GroupBy(s => string.Join("", Regex.Matches(s.StartTime+s.TopicStem+s.Question, "[\u4e00-\u9fa5a-zA-Z0-9]+"))) insertData = insertData.GroupBy(s => string.Join("", Regex.Matches(s.StartTime + s.TopicStem + s.Question, "[\u4e00-\u9fa5a-zA-Z0-9]+")))
.Select(s => s.First()).ToList(); .Select(s => s.First()).ToList();
if (insertData == null || insertData.Count == 0 || insertQuestionKonw.Count == 0) if (insertData == null || insertData.Count == 0 || insertQuestionKonw.Count == 0)
return null; return null;
@ -503,7 +514,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
} }
catch (Exception) catch (Exception)
{ {
throw new Exception("没有对应的子知识点=>" + sections+" "+ kInfo?.Name); throw new Exception("没有对应的子知识点=>" + sections + " " + kInfo?.Name);
} }
//AI优化字幕 //AI优化字幕
@ -523,17 +534,18 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);//ChatGPT var insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);//ChatGPT
//校验结果质量 //校验结果质量
var checkRes = await VerifySpanQuality(questionRes, taskInfo, captions, sections, Course_Id); var checkRes = await VerifySpanQuality(questionRes, taskInfo, captions, sections, Course_Id);
await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果 得分=>{checkRes.Score} ");
await redisManager.AddTaskLog(taskInfo.Id, checkRes.Evaluation);
if (checkRes != null && checkRes.Score >= 80) await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果 得分=>{checkRes.Score} ");
await redisManager.AddTaskLog(taskInfo.Id, checkRes.Evaluation);
if (checkRes != null && checkRes.Score >= 85)
{ {
//写入知识点 //写入知识点
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id); await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
await videoKonwPointDB.InsertRangeAsync(insertData); await videoKonwPointDB.InsertRangeAsync(insertData);
break; break;
}else }
else
await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果不合格!即将重试 剩余次数{tryCount}"); await redisManager.AddTaskLog(taskInfo.Id, $"=>课堂内容AI分析结果不合格!即将重试 剩余次数{tryCount}");
if (questionRes.Any(s => s.KeepTime < 30)) if (questionRes.Any(s => s.KeepTime < 30))
{ {
@ -596,7 +608,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var taskInfo = await videoTaskDB.CopyNew().AsQueryable() var taskInfo = await videoTaskDB.CopyNew().AsQueryable()
.Where(s => s.Id == taskId) .Where(s => s.Id == taskId)
.FirstAsync(); .FirstAsync();
if (taskInfo.VideoType != null&& taskInfo.VideoType!=AttachmentsInfoType.) if (taskInfo.VideoType != null && taskInfo.VideoType != AttachmentsInfoType.)
return; return;
var subject = taskInfo.Subject.ToString(); var subject = taskInfo.Subject.ToString();
var Course_Id = taskInfo.CourseId; var Course_Id = taskInfo.CourseId;

View File

@ -41,7 +41,8 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param> /// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
/// <returns></returns> /// <returns></returns>
/// <exception cref="Exception"></exception> /// <exception cref="Exception"></exception>
public async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model = null, int max_tokens = 8000) public override async Task<T> ChatAsync<T>(string task, string postMessages, string title,
string model = null, int max_tokens = 16000)
{ {
Message[] messageArr = [ Message[] messageArr = [
new Message(postMessages,"user"), new Message(postMessages,"user"),
@ -53,7 +54,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
taskId = task, taskId = task,
title=title, title=title,
model = model, model = model,
max_tokens =12000, max_tokens = max_tokens,
stream = true, stream = true,
temperature = 0.2f, temperature = 0.2f,
messages = messageArr messages = messageArr

View File

@ -63,8 +63,11 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "tokens.txt"); config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "tokens.txt");
//SenseVoice 模型 //SenseVoice 模型
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "model.onnx"); config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "model.onnx");
//1 使用逆文本规范化处理感官语音 //1 使用逆文本规范化处理感官语音 [控制标点符号生成]
config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1; config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
//反转文本规范化规则 fst 的路径
//config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst");
config.ModelConfig.SenseVoice.Language = "zh"; config.ModelConfig.SenseVoice.Language = "zh";
//模型类型 //模型类型
config.ModelConfig.ModelType = string.Empty; config.ModelConfig.ModelType = string.Empty;
@ -106,8 +109,6 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
//} //}
#endregion #endregion
//反转文本规范化规则 fst 的路径
config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst");
#if DEBUG #if DEBUG
config.ModelConfig.Debug = 1; config.ModelConfig.Debug = 1;
@ -162,7 +163,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
int numIter = numSamples / windowSize; int numIter = numSamples / windowSize;
var totalSecond = numSamples / (float)sampleRate; var totalSecond = numSamples / (float)sampleRate;
var res = new List<SenseVoiceRes>(500); var res = new List<SenseVoiceRes>(500);
using var VAD = new VoiceActivityDetector(VADModelConfig, 30); using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 20);
for (int i = 0; i != numIter; ++i) for (int i = 0; i != numIter; ++i)
{ {
int start = i * windowSize; int start = i * windowSize;

View File

@ -1,84 +1,167 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Text;
using System.Text.Json; using System.Text.Json;
using System.Text.RegularExpressions;
using static System.Runtime.InteropServices.JavaScript.JSType; using static System.Runtime.InteropServices.JavaScript.JSType;
namespace VideoAnalysisCore.Common namespace VideoAnalysisCore.Common
{ {
public static class JsonExtractor public static class JsonExtractor
{ {
/// <summary>
/// 修复字符串中不规范的反斜杠转义,使其符合 JSON 规范。
/// 特别适用于包含 LaTeX 公式(如 \overrightarrow, \unit的非标准 JSON 数据。
/// </summary>
public static string ToSafeJsonString(this string json)
{
if (string.IsNullOrEmpty(json)) return json;
// 预分配稍大一点的空间,避免频繁扩容
StringBuilder sb = new StringBuilder(json.Length + (json.Length / 10));
int i = 0;
int len = json.Length;
while (i < len)
{
char c = json[i];
if (c == '\\')
{
int start = i;
while (i < len && json[i] == '\\')
{
i++;
}
int count = i - start;
// 只有奇数个反斜杠才需要检查“尾巴”是否合法
if (count % 2 != 0)
{
// 检查这最后一个反斜杠后面跟的是不是合法的 JSON 转义字符
if (i >= len || !IsValidJsonEscape(json, i))
{
// 非法转义,补齐它
count++;
}
}
// 性能优化:直接添加指定数量的字符
sb.Append('\\', count);
}
else
{
sb.Append(c);
i++;
}
}
return sb.ToString();
}
private static bool IsValidJsonEscape(string text, int nextCharIndex)
{
char nextChar = text[nextCharIndex];
// 标准 JSON 简单转义
if (nextChar == '"' || nextChar == '\\' || nextChar == '/' ||
nextChar == 'b' || nextChar == 'f' || nextChar == 'n' ||
nextChar == 'r' || nextChar == 't')
return true;
// Unicode 转义检查: \uXXXX
if (nextChar == 'u')
{
if (nextCharIndex + 4 < text.Length)
{
for (int k = 1; k <= 4; k++)
{
char hex = text[nextCharIndex + k];
bool isHex = (hex >= '0' && hex <= '9') ||
(hex >= 'a' && hex <= 'f') ||
(hex >= 'A' && hex <= 'F');
if (!isHex) return false;
}
return true;
}
return false;
}
return false;
}
/// <summary> /// <summary>
/// 提取json字符串 /// 提取json字符串
/// </summary> /// </summary>
/// <param name="input"></param> /// <param name="input"></param>
/// <returns></returns> /// <returns></returns>
public static List<string> ExtractJsonStrings(this string input) public static List<string> ExtractJsonStrings(this string input)
{
var results = new List<string>();
if (string.IsNullOrWhiteSpace(input)) return results;
int braceCount = 0;
int bracketCount = 0;
int startIndex = -1;
bool inString = false;
bool isEscaped = false;
for (int i = 0; i<input.Length; i++)
{ {
char c = input[i]; input = input.ToSafeJsonString();
var results = new List<string>();
if (string.IsNullOrWhiteSpace(input)) return results;
// 1. 处理转义字符 (例如 \") int braceCount = 0;
if (isEscaped) int bracketCount = 0;
int startIndex = -1;
bool inString = false;
bool isEscaped = false;
for (int i = 0; i < input.Length; i++)
{ {
isEscaped = false; char c = input[i];
continue;
// 1. 处理转义字符 (例如 \")
if (isEscaped)
{
isEscaped = false;
continue;
}
if (c == '\\')
{
isEscaped = true;
continue;
}
// 2. 处理字符串边界
if (c == '"')
{
inString = !inString;
continue;
}
// 3. 如果在字符串内,忽略括号逻辑
if (inString) continue;
// 4. 处理 JSON 对象和数组的开始
if (c == '{' || c == '[')
{
if (braceCount == 0 && bracketCount == 0)
{
startIndex = i;
}
if (c == '{') braceCount++;
else bracketCount++;
}
// 5. 处理 JSON 对象和数组的结束
else if (c == '}' || c == ']')
{
if (c == '}') braceCount--;
else bracketCount--;
if (braceCount == 0 && bracketCount == 0 && startIndex != -1)
{
string potentialJson = input.Substring(startIndex, i - startIndex + 1);
if (IsValidJson(potentialJson))
{
results.Add(potentialJson);
}
startIndex = -1;
}
}
} }
return results;
if (c == '\\')
{
isEscaped = true;
continue;
}
// 2. 处理字符串边界
if (c == '"')
{
inString = !inString;
continue;
}
// 3. 如果在字符串内,忽略括号逻辑
if (inString) continue;
// 4. 处理 JSON 对象和数组的开始
if (c == '{' || c == '[')
{
if (braceCount == 0 && bracketCount == 0)
{
startIndex = i;
}
if (c == '{') braceCount++;
else bracketCount++;
}
// 5. 处理 JSON 对象和数组的结束
else if (c == '}' || c == ']')
{
if (c == '}') braceCount--;
else bracketCount--;
if (braceCount == 0 && bracketCount == 0 && startIndex != -1)
{
string potentialJson = input.Substring(startIndex, i - startIndex + 1);
if (IsValidJson(potentialJson))
{
results.Add(potentialJson);
} }
startIndex = -1;
}
}
}
return results;
}
public static bool IsValidJson(string candidate) public static bool IsValidJson(string candidate)
{ {
@ -89,7 +172,7 @@ else if (c == '}' || c == ']')
JsonDocument.Parse(candidate); JsonDocument.Parse(candidate);
return true; return true;
} }
catch catch( Exception e)
{ {
return false; return false;
} }