diff --git a/VideoAnalysis/Expand/CoravelExpand.cs b/VideoAnalysis/Expand/CoravelExpand.cs index 01ce3de..9c8f855 100644 --- a/VideoAnalysis/Expand/CoravelExpand.cs +++ b/VideoAnalysis/Expand/CoravelExpand.cs @@ -18,7 +18,9 @@ namespace Learn.VideoAnalysis.Expand Console.WriteLine($"{DateTime.Now}=>初始化 Coravel"); service.AddScheduler(); +#if !DEBUG service.AddTransient(); +#endif service.AddTransient(); } public static void UseCoravelExpand(this IApplicationBuilder provider) diff --git a/VideoAnalysis/Program.cs b/VideoAnalysis/Program.cs index 81a8022..a90ca2d 100644 --- a/VideoAnalysis/Program.cs +++ b/VideoAnalysis/Program.cs @@ -100,7 +100,8 @@ namespace Learn.VideoAnalysis AppCommon.Services = app.Services; app.UseMiddleware("Swagger"); // Configure the HTTP request pipeline. - _ = app.Services.GetRequiredService(); + //redisз + //_ = app.Services.GetRequiredService(); app.UseSwagger(); app.UseSwaggerUI(); app.UseExceptionHandler("/Error"); diff --git a/VideoAnalysis/WebUI/src/views/welcome/showTask.vue b/VideoAnalysis/WebUI/src/views/welcome/showTask.vue index f66f9f6..e6a1ce6 100644 --- a/VideoAnalysis/WebUI/src/views/welcome/showTask.vue +++ b/VideoAnalysis/WebUI/src/views/welcome/showTask.vue @@ -156,8 +156,8 @@ function timeupdateVideo() { (subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end ); // 更新字幕 AI优化字幕 - let subtitleI1 = subtitles1.value.findIndex( - (subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end + let subtitleI1 = subtitles1.value.findLastIndex( + (subtitle) => currentTime >= subtitle.start ); if (subtitleI > -1 && currentSubtitle.value !== subtitles.value[subtitleI].text) { currentSubtitle.value = subtitles.value[subtitleI].text; diff --git a/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs b/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs index c6f8bf9..a58af6e 100644 --- a/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs +++ b/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs @@ -1,4 +1,4 @@ -using Newtonsoft.Json; +using Newtonsoft.Json; using Newtonsoft.Json.Linq; using System; using System.Collections.Generic; @@ -34,9 +34,10 @@ namespace VideoAnalysisCore.AICore.GPT.Dto } public class VideoKnowPointDto { - public float KnowPointWeight { get; set; } public string KnowPoint { get; set; } public string KnowPointId { get; set; } + public float KnowSourceTime { get; set; } + public float KnowPointWeight { get; set; } public string KnowSource { get; set; } } public class VideoKnowRes @@ -65,10 +66,6 @@ namespace VideoAnalysisCore.AICore.GPT.Dto /// public virtual string? KnowPoint { get; set; } /// - /// 知识点权重 - /// - public virtual float? KnowPointWeight { get; set; } - /// /// 知识点ID /// public virtual string? KnowPointId { get; set; } diff --git a/VideoAnalysisCore/AICore/GPT/GTP_Analysis_1.cs b/VideoAnalysisCore/AICore/GPT/GTP_Analysis_1.cs index 0e38102..a4c8e87 100644 --- a/VideoAnalysisCore/AICore/GPT/GTP_Analysis_1.cs +++ b/VideoAnalysisCore/AICore/GPT/GTP_Analysis_1.cs @@ -31,7 +31,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek /// /// 视频分析工作流1 /// - public class GTP_Analysis_1 : IBserGPTWorkflow + public class GTP_Analysis_1 : IBserGPTWorkflow { private readonly GeminiGPTClient geminiClient; private readonly DeepSeekGPTClient deepSeekClient; @@ -326,7 +326,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek var postMessages = $$""" # Role - 你是一位{{subject}}学科的教育专家与资深校对。你的任务是将{{sections}}内容的原始语音识别(STT)JSON 数据清洗为高质量教学文本。 + 你是一位{{subject}}学科的教育专家,有着资深字幕校对经验。你的任务是将{{sections}}内容的原始语音识别(STT)JSON 数据清洗为高质量教学文本。 # Input & Output Protocol 输入和输出均为严格的 JSON 数组格式:`[{"t": number, "r": string}]`。 `t` (Time): 绝对锚点,代表时间戳。严禁修改、严禁排序、严禁删除。 @@ -434,7 +434,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek } return null; } - + /// + /// 作业内容检查 + /// + /// + /// + /// + /// private async Task DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections) { if (captions is null || string.IsNullOrWhiteSpace(captions.Captions)) diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs b/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs index a0aec45..72470aa 100644 --- a/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs +++ b/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs @@ -11,6 +11,7 @@ using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using System.Threading.Tasks; +using UserCenter.Model.Enum; using VideoAnalysisCore.Common; using VideoAnalysisCore.Model; using VideoAnalysisCore.Model.Enum; @@ -49,7 +50,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx /// /// 默认6线程 /// 是否使用gpu 报错请看安装CUDA环境 - public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false) + public void Init(SubjectEnum? subject = null, int numThreads = 10, bool useGPU = false, bool useHotwords = false) { Console.WriteLine("初始化 FunASRNano"); OfflineRecognizerConfig config = new OfflineRecognizerConfig(); @@ -63,17 +64,23 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx //将非结构化数据(文本、图像、音频等)转换为低维稠密向量 config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx"); //接入的大语言模型 - config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx"); + //config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder ,"llm.fp16.onnx"); + config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "int8-2025-12-30", "llm.int8.onnx"); //插入预训练模型(如Transformer)的小型可训练模块 (如语音识别、情感分析) config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx"); //分词器 config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B"); //提示词 config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant."; - config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!"; + //加上学科为空的处理 + if (subject != null) + config.ModelConfig.FunAsrNano.UserPrompt = $"这是一堂中国{subject}的课堂视频音频,请你帮我分析出它讲述的内容!"; + else + config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国课堂的视频音频,请你帮我分析出它讲述的内容!"; config.ModelConfig.FunAsrNano.MaxNewTokens = 512; config.ModelConfig.FunAsrNano.Temperature = 1E-06f; - config.ModelConfig.FunAsrNano.TopP = 0.8f; + config.ModelConfig.FunAsrNano.TopP = 0.7f; + //种子 config.ModelConfig.FunAsrNano.Seed = 42; //模型类型 @@ -81,10 +88,10 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx config.ModelConfig.NumThreads = numThreads; config.ModelConfig.Provider = "cpu"; //需要使用GPU - if (!useGPU) + if (useGPU) config.ModelConfig.Provider = "cuda"; #if DEBUG - config.ModelConfig.Debug = 1; + //config.ModelConfig.Debug = 1; #endif OR = new OfflineRecognizer(config); } @@ -106,15 +113,17 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx /// /// /// - public Task RunTask(string task) + public Task RunTask(string task) { + var taskInfo = serviceProvider.GetRequiredService>().GetById(task); + if(taskInfo is null) + throw new Exception("task 未找到"); var filePath = Path.Combine(task.LocalPath(), "task.wav"); if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) throw new Exception("task 音频路径未找到"); - if (OR is null) Init(); + if (OR is null) Init(taskInfo.Subject); serviceProvider.GetRequiredService() - .TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.ten_vad_324); - + .TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.ten_vad_324); return Task.CompletedTask; } /// diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs b/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs index 25aae3b..675e9ac 100644 --- a/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs +++ b/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs @@ -90,7 +90,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx VADModelConfig.SileroVad = new SileroVadModelConfig(); VADModelConfig.SileroVad.Model = path; //(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。 - VADModelConfig.SileroVad.Threshold = 0.3f; + VADModelConfig.SileroVad.Threshold = 0.25f; //(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?” VADModelConfig.SileroVad.MinSilenceDuration = 0.2f; // (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?” diff --git a/VideoAnalysisCore/Common/RedisExpand.cs b/VideoAnalysisCore/Common/RedisExpand.cs index 3247fc4..ea93c88 100644 --- a/VideoAnalysisCore/Common/RedisExpand.cs +++ b/VideoAnalysisCore/Common/RedisExpand.cs @@ -110,12 +110,14 @@ namespace VideoAnalysisCore.Common public FFMPGEHandle FFMPGE { get; set; } public SenseVoice senseVoice { get; set; } + public FunASRNano funASRNano { get; set; } public RedisManager redisManager { get; set; } - public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager) + public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager, FunASRNano funASRNano) { FFMPGE = fFMPGE; this.senseVoice = senseVoice; + this.funASRNano = funASRNano; this.redisManager = redisManager; Init(); redisManager.InitChannel(); @@ -137,7 +139,8 @@ namespace VideoAnalysisCore.Common await scope.ServiceProvider.GetService()?.RunTask(task); }); SubscribeList.Add(RedisChannelEnum.分离音频, FFMPGE.RunAsync); - SubscribeList.Add(RedisChannelEnum.解析字幕, senseVoice.RunTask); + //SubscribeList.Add(RedisChannelEnum.解析字幕, senseVoice.RunTask); + SubscribeList.Add(RedisChannelEnum.解析字幕, funASRNano.RunTask); //SubscribeList.Add(RedisChannelEnum.解析说话人,Speaker.Run); SubscribeList.Add(RedisChannelEnum.AI课程类型, async (task) => { diff --git a/VideoAnalysisCore/Controllers/Dto/ApiDto.cs b/VideoAnalysisCore/Controllers/Dto/ApiDto.cs index fb29636..e874697 100644 --- a/VideoAnalysisCore/Controllers/Dto/ApiDto.cs +++ b/VideoAnalysisCore/Controllers/Dto/ApiDto.cs @@ -128,6 +128,14 @@ namespace VideoAnalysisCore.Controllers.Dto /// 用户中心的云校id /// public long? UserCenterCloudSchoolId { get; set; } + /// + /// 教材层次 + /// + public CourselevelTypeEnum? CourseLevel { get; set; } + /// + /// 年级 + /// + public GradeEnum? GradeId { get; set; } /// /// 教育阶段 diff --git a/VideoAnalysisCore/Controllers/VideoTaskController.cs b/VideoAnalysisCore/Controllers/VideoTaskController.cs index ce4b059..6d8ccef 100644 --- a/VideoAnalysisCore/Controllers/VideoTaskController.cs +++ b/VideoAnalysisCore/Controllers/VideoTaskController.cs @@ -153,8 +153,8 @@ namespace VideoAnalysisCore.Controllers public IActionResult AudioRecognition(IFormFile file) { using var s = file.OpenReadStream(); - senseVoice.RunTask(s); - return Ok(); + var res = senseVoice.RunTask(s); + return Ok(res); } /// /// 语音识别 diff --git a/VideoAnalysisCore/Model/Enum/CourselevelType.cs b/VideoAnalysisCore/Model/Enum/CourselevelType.cs new file mode 100644 index 0000000..ab57330 --- /dev/null +++ b/VideoAnalysisCore/Model/Enum/CourselevelType.cs @@ -0,0 +1,18 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace VideoAnalysisCore.Model.Enum +{ + /// + /// 课程层次 + /// + public enum CourselevelTypeEnum + { + 一层次 = 1, + 二层次 = 2, + 三层次 = 3 + } +} diff --git a/VideoAnalysisCore/VideoAnalysisCore.csproj b/VideoAnalysisCore/VideoAnalysisCore.csproj index ccfab38..6b3ce02 100644 --- a/VideoAnalysisCore/VideoAnalysisCore.csproj +++ b/VideoAnalysisCore/VideoAnalysisCore.csproj @@ -71,7 +71,7 @@ - +