完善 0122AI视频分析工作流的调试

This commit is contained in:
小肥羊 2026-01-21 10:30:38 +08:00
parent a2d14487cb
commit aecfa4ac0d
10 changed files with 226 additions and 150 deletions

View File

@ -44,7 +44,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param> /// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
/// <returns></returns> /// <returns></returns>
/// <exception cref="Exception"></exception> /// <exception cref="Exception"></exception>
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model = null, int max_tokens = 16000) public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model = null, int max_tokens = 32000)
{ {
Message[] messageArr = [ Message[] messageArr = [
new Message(postMessages,"user"), new Message(postMessages,"user"),

View File

@ -8,16 +8,15 @@ namespace VideoAnalysisCore.AICore.GPT
{ {
public class ChatGPTType public class ChatGPTType
{ {
public static string GPT5_mini = "gpt-5-mini-2025-08-07"; public const string GPT5_mini = "gpt-5-mini";
public static string GPT5 = "gpt-5-2025-08-07"; public const string GPT5 = "gpt-5-2025-08-07";
public static string GPT5_nano = "gpt-5-nano-2025-08-07";
public static string Deepseek_Reasoner = "deepseek-reasoner"; public const string Deepseek_Reasoner = "deepseek-reasoner";
public static string Deepseek_Chat = "deepseek-chat"; public const string Deepseek_Chat = "deepseek-chat";
public static string Gemini_3_Chat_thinking = "gemini-3-pro-preview-thinking"; public const string Gemini_3_Chat_thinking = "gemini-3-pro-preview-thinking";
public static string Gemini_3_Chat = "gemini-3-pro-preview"; public const string Gemini_3_Chat = "gemini-3-pro-preview";
} }

View File

@ -42,17 +42,19 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param> /// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
/// <returns></returns> /// <returns></returns>
/// <exception cref="Exception"></exception> /// <exception cref="Exception"></exception>
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model =null, int max_tokens = 32000) public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model = ChatGPTType.Deepseek_Chat, int max_tokens = 8000)
{ {
Message[] messageArr = [ Message[] messageArr = [
new Message(postMessages,"user"), new Message(postMessages,"user"),
]; ];
messageArr = messageArr.Where(s => s != null).ToArray(); messageArr = messageArr.Where(s => s != null).ToArray();
if (max_tokens > 8000 &&(model is null || model == ChatGPTType.Deepseek_Chat))
max_tokens = 8000;
var chatReq = new ChatRequest var chatReq = new ChatRequest
{ {
taskId = task, taskId = task,
title = title, title = title,
model = model ?? ChatGPTType.Deepseek_Reasoner, model = model ?? ChatGPTType.Deepseek_Chat,
max_tokens = model == ChatGPTType.Deepseek_Reasoner ? 32000 : max_tokens, max_tokens = model == ChatGPTType.Deepseek_Reasoner ? 32000 : max_tokens,
stream = true, stream = true,
temperature = 0.2f, temperature = 0.2f,

View File

@ -23,6 +23,8 @@ using UserCenter.Model.Enum;
using Dm.filter; using Dm.filter;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Diagnostics; using System.Diagnostics;
using Dm.util;
using static System.Net.Mime.MediaTypeNames;
namespace VideoAnalysisCore.AICore.GPT.DeepSeek namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{ {
@ -88,7 +90,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{taskInfo.Subject} {taskInfo.Subject}
- TextbookSource/// - TextbookSource( PPT)PPT///
- KnowPoints - KnowPoints
- KnowPoint - KnowPoint
- KnowPointIdID KnowPoint - KnowPointIdID KnowPoint
@ -142,7 +144,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
EndTime = s.EndTime, EndTime = s.EndTime,
StageId = StageId, StageId = StageId,
KnowPoint = x.KnowPoint, KnowPoint = x.KnowPoint,
KnowPointWeight=x.KnowPointWeight, KnowPointWeight = x.KnowPointWeight,
TextbookSource = s.TextbookSource, TextbookSource = s.TextbookSource,
KnowSource = x.KnowSource, KnowSource = x.KnowSource,
KnowPointId = knowDic[x.KnowPoint].ToString(), KnowPointId = knowDic[x.KnowPoint].ToString(),
@ -213,17 +215,21 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var checkMessage = var checkMessage =
$""" $"""
{pptFormat} {pptFormat}
{sections} {sections}
Theme/Conten匹配,() Theme/Conten匹配,()
Conten有关联() Conten有关联()
:
1.
2.
3.
0-10070,, 0-10070,,
MinusScore: MinusScore:
Suggestion: () Suggestion:
{thems} {thems}
:::|{captions.Captions} :::|{captions.Captions}
@ -254,7 +260,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var message = var message =
$""" $"""
使 使
{pptFormat} {pptFormat}
{sections} {sections}
@ -265,7 +271,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
5) JSON 5) JSON
{thems} {thems}
{suggestion} {suggestion}
:::|{captions.Captions} :::|{captions.Captions}
JSON{resFormat} JSON{resFormat}
"""; """;
@ -284,7 +290,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
private async Task<SenseVoiceRes[]> OptimizeSubtitles(VideoTask taskInfo, private async Task<SenseVoiceRes[]> OptimizeSubtitles(VideoTask taskInfo,
SenseVoiceRes[] captionsArr, string sections) SenseVoiceRes[] captionsArr, string sections)
{ {
if (!string.IsNullOrEmpty(taskInfo.CaptionsAI) && taskInfo.CaptionsAI!="[]") if (!string.IsNullOrEmpty(taskInfo.CaptionsAI) && taskInfo.CaptionsAI != "[]")
return JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.CaptionsAI); return JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.CaptionsAI);
var subject = taskInfo.Subject.ToString(); var subject = taskInfo.Subject.ToString();
var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length); var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length);
@ -292,9 +298,21 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var totalCount = captionsArr.Length / spanCount + 1; var totalCount = captionsArr.Length / spanCount + 1;
await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化"); await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化");
var chatClentArr = new GPTClient[] { deepSeekClient, chatGPTClient, geminiClient }; Func<string, Task<List<SenseVoiceInput>>>[] chatClentArr =
[
async (string m)=>await deepSeekClient
.ChatAsync<List<SenseVoiceInput>>(taskInfo.Id.ToString(), m, "优化字幕",ChatGPTType.Deepseek_Chat,8_000),
async (string m)=>await chatGPTClient
.ChatAsync<List<SenseVoiceInput>>(taskInfo.Id.ToString(), m, "优化字幕",ChatGPTType.GPT5,16_000),
async (string m)=>await geminiClient
.ChatAsync<List<SenseVoiceInput>>(taskInfo.Id.ToString(), m, "优化字幕",ChatGPTType.Gemini_3_Chat,16_000), ];
await Parallel.ForAsync(0, totalCount, await Parallel.ForAsync(0, totalCount,
new ParallelOptions() { MaxDegreeOfParallelism = 1 }, new ParallelOptions()
#if DEBUG
{ MaxDegreeOfParallelism = 1 },
#else
{ MaxDegreeOfParallelism = 9 },
#endif
async (s, c) => async (s, c) =>
{ {
var cArr = captionsArr var cArr = captionsArr
@ -302,43 +320,63 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
.Take(spanCount); .Take(spanCount);
if (cArr.Count() == 0) if (cArr.Count() == 0)
return; return;
var cStrArr = cArr.Select(s => s.Text); var cStrArr = cArr.Adapt<SenseVoiceInput[]>();
var nowCaptionStr = cStrArr.ToJson(); var nowCaptionStr = cStrArr.ToJson();
var resFormat = """[string(修改结果)]"""; var resFormat = """[{"t":时间(number),"r"字幕(string)}]""";
var postMessages = var postMessages =
$"角色设定:你是一位专业的中国{subject}学科专家,负责校对关于{sections}内容的课堂教学字幕。\n" + $$"""
$"任务描述:\n" +
$"请根据上下文逻辑对输入的语音识别STT字幕进行深度优化。具体要求如下\n" + {{subject}}{{sections}}STT稿
$"1. 逻辑纠错:结合{subject}学科背景,利用前后文语义修正所有错误词汇。不仅要修正同音错别词(如:树列改为数列),还要修正因识别模糊导致的语义断裂或学科术语错误。\n" +
$"2. 断句与标点:优化字幕的标点符号,并根据老师说话的语感和学科逻辑重新调整断句位置。确保每一条字幕在学术表达上自然、通顺,修复由于语音停顿造成的断句不当。\n" + JSON
$"3. 公式规范:将字幕中提到的数学或科学公式统一转化为规范的 LaTeX 格式(使用$包裹公式,注意严格遵守Json格式的转义符号)。\n" + 1. {{subject}},{{sections}}
$"强制约束:\n" +
$"- 数量对齐输出的字幕条数Array Length必须与输入的字幕条数完全一致严禁合并、拆分或删除任何条目。\n" + 使
$"- 纯净返回:只允许返回 JSON 格式的字符串,严禁包含任何前言、后缀或解释性文字。\n" + 2.
$"- 数据格式JSON 结构必须严格符合:{resFormat}\n" + ""
$"待优化字幕内容:\n" + 3. 使
$"{nowCaptionStr}\n" +
$"最终核对:请确保输出 JSON 中包含的字幕条数与输入的字幕条数完全对应。"; 4. / LaTeX JSON LaTeX $\\\\frac{a}{b}$使$"。
List<string>? resData = null;
for (int i = 0; i < 3; i++) JSON {{cStrArr.Count()}}
JSON JSON ```json Markdown
{{nowCaptionStr}}
1. JSON {{cStrArr.Count()}}
2. N N
3.
4. 使 "",
5. JSON JSON Markdown
t:
r:
t:t
r:
""";
List<SenseVoiceInput>? resData = null;
for (int i = 0; i < 6; i++)
{ {
resData = await chatClentArr[i].ChatAsync<List<string>>(taskInfo.Id.ToString(), postMessages, "优化字幕", ChatGPTType.Deepseek_Chat, 8000); resData = await chatClentArr[0](postMessages);
if (resData.Count() == cArr.Count()) //var cc = resData.Select(s => s.Start); 检查差异化
//var ccRes = cArr.Where(x => !cc.Contains(x.Start));
if (cArr.Count() - resData.Count() < 5)
break; break;
else else
await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化 分段{s} AI结果数量不匹配 重试{i}"); await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化 分段{s} AI结果数量不匹配 重试{i} 剩余{captionsArr.Length - (decimal)newCaptionsList.Count}条字幕");
} }
if (cArr.Count() - resData.Count() > 5)
if (resData.Count() != cArr.Count())
{ {
resData = cStrArr.ToList(); resData = cStrArr.ToList();
await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化 分段{s} AI结果数量不匹配 采用原始值"); await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化 分段{s} AI结果数量不匹配 采用原始值");
} }
newCaptionsList.AddRange(resData.Select((text, i) => new SenseVoiceRes() newCaptionsList.AddRange(resData.Select((el, i) => new SenseVoiceRes()
{ {
Start = captionsArr[spanCount * s + i].Start, Start = el.Start,
End = captionsArr[spanCount * s + i].End, Text = el.Text,
Text = text,
})); }));
return; return;
}); });
@ -367,31 +405,33 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var keyFrameStr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) || string.IsNullOrEmpty(taskInfo?.PPTKeyFrame) var keyFrameStr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) || string.IsNullOrEmpty(taskInfo?.PPTKeyFrame)
? $"请分析授课中字幕描述的知识内容,然后基于视频整体知识点讲解提炼出不同的阶段以便对老师上课内容切片提取为知识库,所以请确保阶段的内容准确性" ? $"请分析授课中字幕描述的知识内容,然后基于视频整体知识点讲解提炼出不同的阶段以便对老师上课内容切片提取为知识库,所以请确保阶段的内容准确性"
: $"授课中老师的PPT在这些时间段内进行了切换{taskInfo.PPTKeyFrame},理应这些时间段内的讲述内容也发生了变化,请你基于PPT变化时间点结合字幕描述的知识内容提炼出不同的切片。" + : $"授课中老师的PPT在这些时间段内进行了切换{taskInfo.PPTKeyFrame},理应这些时间段内的讲述内容也发生了变化,请你基于PPT变化时间点结合字幕描述的知识内容提炼出不同的切片。" +
$"每个阶段的起始和结束应接近这些时间点(例如,以时间点为中心,扩展至内容自然过渡处)。"; $"每个阶段的起始和结束应接近这些时间点(部分PPT时间段也可能不准确,请参考字幕内容)。";
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":阶段主题(string),"Content":内容总结(string)}]"""; var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":阶段主题(string),"Content":内容总结(string)}]""";
var reviewStr = taskInfo?.VideoType == AttachmentsInfoType. var reviewStr = taskInfo?.VideoType == AttachmentsInfoType.
? $"但本堂课是习题课,所以大部分阶段是不同的例题讲解内容。\n" ? $"本堂课是习题课,绝大部分阶段是不同的例题讲解内容。\n"
: string.Empty; : string.Empty;
var postMessages = string.Empty; var postMessages = string.Empty;
postMessages = postMessages =
$"请通过视频字幕内容分析出视频中课堂的授课知识点切片\n" + $"""
$"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。\n" +
$"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。\n" + {taskInfo.Subject}{sections}
reviewStr +
$"讲解知识内容的阶段的细分程度到某个知识点的讲解/认识/例题/总结\n" + 5////
$"不分析课堂作业相关的内容我已经预处理了\n" + {reviewStr}
$"初步划分阶段:{keyFrameStr}\n" + ///
$"Stage判断阶段类型如果内容以解题为主归类为“例题精讲”如果涉及新知识讲解归类为“新知讲解”以此类推。\n" +
$"Content简述单个阶段的核心讲解内容40~150字如“例题”“证明”“练习”“总结”..., 必须完全基于字幕文本可推断的信息,禁止捏造不存在的内容(硬性条件)。\n" + {keyFrameStr}
$"Theme理解Content提炼一个精确的主题例如“柯西不等式的基本应用”。\n" + Stage
$"输出要求:确保阶段划分合理、无重叠、\n" + Content40~150...,
$"作业布置阶段一般出现在末尾如果有" + ThemeContent西
$"输出格式要求内容只返回json格式({resFormat})\n" +
$"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。\n" + json格式({resFormat})
$"字幕列表 {captions.Captions} 字幕结束!"; (:|).
{captions.Captions} !
""";
await redisManager.AddTaskLog(taskInfo.Id, $"开始分析视频内容 {tryCount}"); await redisManager.AddTaskLog(taskInfo.Id, $"开始分析视频内容 {tryCount}");
var res = await geminiClient.ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), postMessages, "分析字幕"); var res = await geminiClient.ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), postMessages, "分析字幕", ChatGPTType.Gemini_3_Chat_thinking);
return res; return res;
} }
catch (Exception ex) catch (Exception ex)
@ -724,22 +764,29 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
await redisManager.AddTaskLog(taskInfo.Id, $"==>改进意见 {checkRes.Suggestion}"); await redisManager.AddTaskLog(taskInfo.Id, $"==>改进意见 {checkRes.Suggestion}");
await redisManager.AddTaskLog(taskInfo.Id, $"==>扣分原因 {checkRes.MinusScore}"); await redisManager.AddTaskLog(taskInfo.Id, $"==>扣分原因 {checkRes.MinusScore}");
// 质量复检 // 质量复检
if (checkRes != null) //if (checkRes != null)
{ //{
var improved = await ImproveSpanBySuggestion(questionRes, taskInfo, captions, sections, "扣分原因 {checkRes.MinusScore} \n 改进意见 {checkRes.Suggestion}"); // var improved = await ImproveSpanBySuggestion(questionRes, taskInfo, captions, sections, "扣分原因 {checkRes.MinusScore} \n 改进意见 {checkRes.Suggestion}");
if (improved != null) // var improvedCheck = await VerifySpanQuality(improved, taskInfo, captions, sections, Course_Id);
{ // await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后复检得分=>{improvedCheck.Score}");
var improvedCheck = await VerifySpanQuality(improved, taskInfo, captions, sections, Course_Id); // await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后扣分原因 {improvedCheck.MinusScore}");
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后复检得分=>{improvedCheck.Score}"); // if (improved != null)
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后扣分原因 {improvedCheck.MinusScore}"); // {
// if (improvedCheck != null && improvedCheck.Score >= 90 && improvedCheck.Score > checkRes.Score)
// {
// questionRes = improved;
// }
// else
// {
// await redisManager.AddTaskLog(taskInfo.Id, $"==>优化之后的得分降低/得分过低");
// continue;
// }
// }
//}
if (improvedCheck != null && improvedCheck.Score >= 90 && improvedCheck.Score > checkRes.Score) if (checkRes != null && checkRes.Score >= 90)
{ {
questionRes = improved; //写入知识点
if (homework != null && (!questionRes.Any(s => s.Stage == StageEnum..ToString())))
questionRes.Add(homework);
insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id); await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
await videoTaskStageDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id); await videoTaskStageDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
var tStage = insertData.GroupBy(s => s.StageId).Select(s => new VideoTaskStage var tStage = insertData.GroupBy(s => s.StageId).Select(s => new VideoTaskStage
@ -754,26 +801,14 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
Stage = s.First().Stage, Stage = s.First().Stage,
Theme = s.First().Theme, Theme = s.First().Theme,
VideoTaskId = taskInfo.Id, VideoTaskId = taskInfo.Id,
}).ToArray(); }).ToList();
//尝试追加 作业布置分段
if (homework != null && (!questionRes.Any(s => s.Stage == StageEnum..ToString())))
tStage.Add(homework.Adapt<VideoTaskStage>());
await videoTaskStageDB.InsertRangeAsync(tStage); await videoTaskStageDB.InsertRangeAsync(tStage);
await videoKonwPointDB.InsertRangeAsync(insertData); await videoKonwPointDB.InsertRangeAsync(insertData);
break; break;
} }
else
{
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化之后的得分降低/得分过低");
continue;
}
}
}
if (checkRes != null && checkRes.Score >= 90)
{
//写入知识点
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
await videoKonwPointDB.InsertRangeAsync(insertData);
break;
}
else else
await redisManager.AddTaskLog(taskInfo.Id, $"==>课堂内容AI分析结果不合格!即将重试 剩余次数{tryCount}"); await redisManager.AddTaskLog(taskInfo.Id, $"==>课堂内容AI分析结果不合格!即将重试 剩余次数{tryCount}");
if (questionRes.Any(s => s.KeepTime < 30)) if (questionRes.Any(s => s.KeepTime < 30))

View File

@ -95,26 +95,21 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
#endif #endif
OR = new OfflineRecognizer(config); OR = new OfflineRecognizer(config);
//var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
//OfflineRecognizerConfig config1 = new OfflineRecognizerConfig();
//config1.FeatConfig.SampleRate = 16000;
//config1.FeatConfig.FeatureDim = 80;
//config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17"; //config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
OfflineRecognizerConfig config1 = new OfflineRecognizerConfig(); ////1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
config1.FeatConfig.SampleRate = 16000; //config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
config1.FeatConfig.FeatureDim = 80; //config1.ModelConfig.SenseVoice.Language = "zh";
config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt"); //config1.ModelConfig.ModelType = string.Empty;
config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx"); //config1.ModelConfig.NumThreads = numThreads;
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。 //config1.ModelConfig.Provider = "cpu";
config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1; //config1.DecodingMethod = "greedy_search";
config1.ModelConfig.SenseVoice.Language = "zh"; //config1.ModelConfig.Debug = 1;
config1.ModelConfig.ModelType = string.Empty; //OR1 = new OfflineRecognizer(config: config1);
config1.ModelConfig.NumThreads = numThreads;
config1.ModelConfig.Provider = "cpu";
config1.DecodingMethod = "greedy_search";
config1.ModelConfig.Debug = 1;
OR1 = new OfflineRecognizer(config: config1);
//OR1 = FunASRNano.OR; //OR1 = FunASRNano.OR;
} }
@ -143,7 +138,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
throw new Exception("task 音频路径未找到"); throw new Exception("task 音频路径未找到");
if (OR is null) Init(); if (OR is null) Init();
serviceProvider.GetRequiredService<SherpaVad>() serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.silero_vad_v5); .TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.ten_vad_324);
return Task.CompletedTask; return Task.CompletedTask;
} }

View File

@ -1,7 +1,23 @@
using Whisper.net; using System.Text.Json.Serialization;
using Whisper.net;
namespace VideoAnalysisCore.AICore.SherpaOnnx namespace VideoAnalysisCore.AICore.SherpaOnnx
{ {
public class SenseVoiceInput()
{
/// <summary>
/// 文本
/// </summary>
[JsonPropertyName("r")]
public string Text { get; set; } = string.Empty;
/// <summary>
/// 开始时间
/// </summary>
[JsonPropertyName("t")]
public float Start { get; set; }
}
/// <summary> /// <summary>
/// 字幕识别 结果 /// 字幕识别 结果
/// </summary> /// </summary>

View File

@ -52,6 +52,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
static VadModelConfig VADModelConfig = default!; static VadModelConfig VADModelConfig = default!;
private readonly RedisManager redisManager; private readonly RedisManager redisManager;
private int WindowSize = 512;
private readonly IServiceProvider serviceProvider; private readonly IServiceProvider serviceProvider;
private readonly VoiceActivityDetector vad; private readonly VoiceActivityDetector vad;
private Func<int, float[], OfflineStream> Callback; private Func<int, float[], OfflineStream> Callback;
@ -63,14 +64,11 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
this.serviceProvider = serviceProvider; this.serviceProvider = serviceProvider;
VADModelConfig = new VadModelConfig(); VADModelConfig = new VadModelConfig();
VADModelConfig.SampleRate = 16000;
VADModelConfig.NumThreads = 1;
VADModelConfig.Provider = "cpu";
#if DEBUG #if DEBUG
VADModelConfig.Debug = 1; VADModelConfig.Debug = 1;
#endif #endif
VADModelConfig.SileroVad = new SileroVadModelConfig();
VADModelConfig.TenVad = new TenVadModelConfig();
} }
/// <summary> /// <summary>
@ -84,15 +82,36 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
{ {
VADModelConfig.NumThreads = numThreads; VADModelConfig.NumThreads = numThreads;
VADModelConfig.Provider = useGPU? "cuda" : "cpu"; VADModelConfig.Provider = useGPU? "cuda" : "cpu";
var path = Path.Combine(AppCommon.AIModelFile, "vad", SherpaVadVersion.silero_vad_v5); var path = Path.Combine(AppCommon.AIModelFile, "vad", vadVersion);
switch (vadVersion) switch (vadVersion)
{ {
case SherpaVadVersion.silero_vad_v4: case SherpaVadVersion.silero_vad_v4:
case SherpaVadVersion.silero_vad_v5: case SherpaVadVersion.silero_vad_v5:
VADModelConfig.SileroVad = new SileroVadModelConfig();
VADModelConfig.SileroVad.Model = path; VADModelConfig.SileroVad.Model = path;
//(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。
VADModelConfig.SileroVad.Threshold = 0.3f;
//(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?”
VADModelConfig.SileroVad.MinSilenceDuration = 0.2f;
// (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?”
VADModelConfig.SileroVad.MinSpeechDuration = 0.2f;
//(最大语音长度)秒
VADModelConfig.SileroVad.MaxSpeechDuration = 3.5f;
WindowSize = VADModelConfig.SileroVad.WindowSize;
break; break;
case SherpaVadVersion.ten_vad_324: case SherpaVadVersion.ten_vad_324:
VADModelConfig.TenVad = new TenVadModelConfig();
VADModelConfig.TenVad.Model = path; VADModelConfig.TenVad.Model = path;
//(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。
VADModelConfig.TenVad.Threshold = 0.3f;
//(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?”
VADModelConfig.TenVad.MinSilenceDuration = 0.2f;
// (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?”
VADModelConfig.TenVad.MinSpeechDuration = 0.2f;
//(最大语音长度)秒
VADModelConfig.TenVad.MaxSpeechDuration = 3.5f;
VADModelConfig.TenVad.WindowSize = 256;
WindowSize = VADModelConfig.TenVad.WindowSize;
break; break;
default: default:
break; break;
@ -118,41 +137,47 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
// 使用 Span 操作原始数据 // 使用 Span 操作原始数据
ReadOnlySpan<float> allSamples = reader.Samples.AsSpan(); ReadOnlySpan<float> allSamples = reader.Samples.AsSpan();
int numSamples = allSamples.Length; int numSamples = allSamples.Length;
int windowSize = VADModelConfig.SileroVad.WindowSize;
int sampleRate = VADModelConfig.SampleRate; int sampleRate = VADModelConfig.SampleRate;
int numIter = numSamples / windowSize; int numIter = numSamples / WindowSize;
var totalSecond = numSamples / (float)sampleRate; var totalSecond = numSamples / (float)sampleRate;
var res = new List<SenseVoiceRes>(500); var res = new List<SenseVoiceRes>(500);
VoiceActivityDetector vad;
using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 30); try
{
vad = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 20);
}
catch (Exception ex)
{
throw;
}
// 优化:复用缓冲区,避免在循环中重复分配内存 // 优化:复用缓冲区,避免在循环中重复分配内存
float[] buffer = new float[windowSize]; float[] buffer = new float[WindowSize];
for (int i = 0; i != numIter; ++i) for (int i = 0; i != numIter; ++i)
{ {
int start = i * windowSize; int start = i * WindowSize;
// 使用 Span 高效复制数据到固定缓冲区 // 使用 Span 高效复制数据到固定缓冲区
allSamples.Slice(start, windowSize).CopyTo(buffer); allSamples.Slice(start, WindowSize).CopyTo(buffer);
VAD.AcceptWaveform(buffer); vad.AcceptWaveform(buffer);
//是否检测到语音 //是否检测到语音
if (VAD.IsSpeechDetected()) if (vad.IsSpeechDetected())
{ {
//获取最新的发言片段 //获取最新的发言片段
while (!VAD.IsEmpty()) while (!vad.IsEmpty())
{ {
var p = ReadNext(VAD,res, totalSecond); var p = ReadNext(vad,res, totalSecond);
if (p != null) redisManager.SetTaskProgress(task, p + "%"); if (p != null) redisManager.SetTaskProgress(task, p + "%");
} }
} }
} }
VAD.Flush(); vad.Flush();
while (!VAD.IsEmpty()) while (!vad.IsEmpty())
{ {
var p = ReadNext(VAD, res, totalSecond); var p = ReadNext(vad, res, totalSecond);
if(p!= null) redisManager.SetTaskProgress(task, p + "%"); if(p!= null) redisManager.SetTaskProgress(task, p + "%");
} }
//如果携带任务ID //如果携带任务ID
@ -169,6 +194,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
//分析完成视频字幕后继续接收任务 //分析完成视频字幕后继续接收任务
//redisManager.NewTask(); //redisManager.NewTask();
} }
vad.Dispose();
return res; return res;
} }
/// <summary> /// <summary>

View File

@ -237,6 +237,7 @@ namespace VideoAnalysisCore.Controllers.Dto
/// 知识点ID /// 知识点ID
/// </summary> /// </summary>
public string KnowPointId { get; set; } public string KnowPointId { get; set; }
public float KnowWeight { get; set; }
} }
public class TaskKnowBlock public class TaskKnowBlock

View File

@ -244,6 +244,7 @@ namespace VideoAnalysisCore.Controllers
Id = x.Id, Id = x.Id,
KnowPoint = x.KnowPoint, KnowPoint = x.KnowPoint,
KnowPointId = x.KnowPointId, KnowPointId = x.KnowPointId,
KnowWeight = x.KnowPointWeight??0f,
})?.ToArray() })?.ToArray()
: null : null
}).ToArray() }).ToArray()

View File

@ -8,6 +8,7 @@ using VideoAnalysisCore.AICore.SherpaOnnx;
using VideoAnalysisCore.Model.Enum; using VideoAnalysisCore.Model.Enum;
using VideoAnalysisCore.Model.Interface; using VideoAnalysisCore.Model.Interface;
using Whisper.net; using Whisper.net;
using Yitter.IdGenerator;
namespace VideoAnalysisCore.Model namespace VideoAnalysisCore.Model
{ {
@ -21,7 +22,7 @@ namespace VideoAnalysisCore.Model
/// id /// id
/// </summary> /// </summary>
[SugarColumn(IsPrimaryKey = true)] [SugarColumn(IsPrimaryKey = true)]
public long Id { get; set; } public long Id { get; set; } = YitIdHelper.NextId();
/// <summary> /// <summary>
/// 视频任务id /// 视频任务id
/// <see cref="VideoTask.Id"/> /// <see cref="VideoTask.Id"/>