调试 FunASR的STT,修复流程上的bug

This commit is contained in:
小肥羊 2026-01-30 17:23:40 +08:00
parent eed63794b8
commit d948f854fb
12 changed files with 72 additions and 28 deletions

View File

@ -18,7 +18,9 @@ namespace Learn.VideoAnalysis.Expand
Console.WriteLine($"{DateTime.Now}=>初始化 Coravel"); Console.WriteLine($"{DateTime.Now}=>初始化 Coravel");
service.AddScheduler(); service.AddScheduler();
#if !DEBUG
service.AddTransient<TaskFileClearJob>(); service.AddTransient<TaskFileClearJob>();
#endif
service.AddTransient<NodePackageJob>(); service.AddTransient<NodePackageJob>();
} }
public static void UseCoravelExpand(this IApplicationBuilder provider) public static void UseCoravelExpand(this IApplicationBuilder provider)

View File

@ -100,7 +100,8 @@ namespace Learn.VideoAnalysis
AppCommon.Services = app.Services; AppCommon.Services = app.Services;
app.UseMiddleware<BasicAuthMiddleware>("Swagger"); app.UseMiddleware<BasicAuthMiddleware>("Swagger");
// Configure the HTTP request pipeline. // Configure the HTTP request pipeline.
_ = app.Services.GetRequiredService<RedisInit>(); //¿ªÆôredis¶ÓÁзþÎñ
//_ = app.Services.GetRequiredService<RedisInit>();
app.UseSwagger(); app.UseSwagger();
app.UseSwaggerUI(); app.UseSwaggerUI();
app.UseExceptionHandler("/Error"); app.UseExceptionHandler("/Error");

View File

@ -156,8 +156,8 @@ function timeupdateVideo() {
(subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end (subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end
); );
// AI // AI
let subtitleI1 = subtitles1.value.findIndex( let subtitleI1 = subtitles1.value.findLastIndex(
(subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end (subtitle) => currentTime >= subtitle.start
); );
if (subtitleI > -1 && currentSubtitle.value !== subtitles.value[subtitleI].text) { if (subtitleI > -1 && currentSubtitle.value !== subtitles.value[subtitleI].text) {
currentSubtitle.value = subtitles.value[subtitleI].text; currentSubtitle.value = subtitles.value[subtitleI].text;

View File

@ -1,4 +1,4 @@
using Newtonsoft.Json; using Newtonsoft.Json;
using Newtonsoft.Json.Linq; using Newtonsoft.Json.Linq;
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
@ -34,9 +34,10 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
} }
public class VideoKnowPointDto public class VideoKnowPointDto
{ {
public float KnowPointWeight { get; set; }
public string KnowPoint { get; set; } public string KnowPoint { get; set; }
public string KnowPointId { get; set; } public string KnowPointId { get; set; }
public float KnowSourceTime { get; set; }
public float KnowPointWeight { get; set; }
public string KnowSource { get; set; } public string KnowSource { get; set; }
} }
public class VideoKnowRes public class VideoKnowRes
@ -65,10 +66,6 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
/// </summary> /// </summary>
public virtual string? KnowPoint { get; set; } public virtual string? KnowPoint { get; set; }
/// <summary> /// <summary>
/// 知识点权重
/// </summary>
public virtual float? KnowPointWeight { get; set; }
/// <summary>
/// 知识点ID /// 知识点ID
/// </summary> /// </summary>
public virtual string? KnowPointId { get; set; } public virtual string? KnowPointId { get; set; }

View File

@ -31,7 +31,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
/// <summary> /// <summary>
/// 视频分析工作流1 /// 视频分析工作流1
/// </summary> /// </summary>
public class GTP_Analysis_1 : IBserGPTWorkflow public class GTP_Analysis_1 : IBserGPTWorkflow
{ {
private readonly GeminiGPTClient geminiClient; private readonly GeminiGPTClient geminiClient;
private readonly DeepSeekGPTClient deepSeekClient; private readonly DeepSeekGPTClient deepSeekClient;
@ -326,7 +326,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var postMessages = var postMessages =
$$""" $$"""
# Role # Role
{{subject}}{{sections}}STTJSON {{subject}},{{sections}}STTJSON
# Input & Output Protocol # Input & Output Protocol
JSON `[{"t": number, "r": string}]` JSON `[{"t": number, "r": string}]`
`t` (Time): `t` (Time):
@ -434,7 +434,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
} }
return null; return null;
} }
/// <summary>
/// 作业内容检查
/// </summary>
/// <param name="taskInfo"></param>
/// <param name="captions"></param>
/// <param name="sections"></param>
/// <returns></returns>
private async Task<VideoKnowRes?> DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections) private async Task<VideoKnowRes?> DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections)
{ {
if (captions is null || string.IsNullOrWhiteSpace(captions.Captions)) if (captions is null || string.IsNullOrWhiteSpace(captions.Captions))

View File

@ -11,6 +11,7 @@ using System.Text;
using System.Text.Json; using System.Text.Json;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
using System.Threading.Tasks; using System.Threading.Tasks;
using UserCenter.Model.Enum;
using VideoAnalysisCore.Common; using VideoAnalysisCore.Common;
using VideoAnalysisCore.Model; using VideoAnalysisCore.Model;
using VideoAnalysisCore.Model.Enum; using VideoAnalysisCore.Model.Enum;
@ -49,7 +50,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// </summary> /// </summary>
/// <param name="numThreads">默认6线程</param> /// <param name="numThreads">默认6线程</param>
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param> /// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false) public void Init(SubjectEnum? subject = null, int numThreads = 10, bool useGPU = false, bool useHotwords = false)
{ {
Console.WriteLine("初始化 FunASRNano"); Console.WriteLine("初始化 FunASRNano");
OfflineRecognizerConfig config = new OfflineRecognizerConfig(); OfflineRecognizerConfig config = new OfflineRecognizerConfig();
@ -63,17 +64,23 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量 //将非结构化数据(文本、图像、音频等)转换为低维稠密向量
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx"); config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
//接入的大语言模型 //接入的大语言模型
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx"); //config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder ,"llm.fp16.onnx");
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "int8-2025-12-30", "llm.int8.onnx");
//插入预训练模型如Transformer的小型可训练模块 (如语音识别、情感分析) //插入预训练模型如Transformer的小型可训练模块 (如语音识别、情感分析)
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx"); config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
//分词器 //分词器
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B"); config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
//提示词 //提示词
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant."; config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!"; //加上学科为空的处理
if (subject != null)
config.ModelConfig.FunAsrNano.UserPrompt = $"这是一堂中国{subject}的课堂视频音频,请你帮我分析出它讲述的内容!";
else
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国课堂的视频音频,请你帮我分析出它讲述的内容!";
config.ModelConfig.FunAsrNano.MaxNewTokens = 512; config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
config.ModelConfig.FunAsrNano.Temperature = 1E-06f; config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
config.ModelConfig.FunAsrNano.TopP = 0.8f; config.ModelConfig.FunAsrNano.TopP = 0.7f;
//种子
config.ModelConfig.FunAsrNano.Seed = 42; config.ModelConfig.FunAsrNano.Seed = 42;
//模型类型 //模型类型
@ -81,10 +88,10 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.ModelConfig.NumThreads = numThreads; config.ModelConfig.NumThreads = numThreads;
config.ModelConfig.Provider = "cpu"; config.ModelConfig.Provider = "cpu";
//需要使用GPU //需要使用GPU
if (!useGPU) if (useGPU)
config.ModelConfig.Provider = "cuda"; config.ModelConfig.Provider = "cuda";
#if DEBUG #if DEBUG
config.ModelConfig.Debug = 1; //config.ModelConfig.Debug = 1;
#endif #endif
OR = new OfflineRecognizer(config); OR = new OfflineRecognizer(config);
} }
@ -106,15 +113,17 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// </summary> /// </summary>
/// <param name="task"></param> /// <param name="task"></param>
/// <returns></returns> /// <returns></returns>
public Task RunTask(string task) public Task RunTask(string task)
{ {
var taskInfo = serviceProvider.GetRequiredService<Repository<VideoTask>>().GetById(task);
if(taskInfo is null)
throw new Exception("task 未找到");
var filePath = Path.Combine(task.LocalPath(), "task.wav"); var filePath = Path.Combine(task.LocalPath(), "task.wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到"); throw new Exception("task 音频路径未找到");
if (OR is null) Init(); if (OR is null) Init(taskInfo.Subject);
serviceProvider.GetRequiredService<SherpaVad>() serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.ten_vad_324); .TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.ten_vad_324);
return Task.CompletedTask; return Task.CompletedTask;
} }
/// <summary> /// <summary>

View File

@ -90,7 +90,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
VADModelConfig.SileroVad = new SileroVadModelConfig(); VADModelConfig.SileroVad = new SileroVadModelConfig();
VADModelConfig.SileroVad.Model = path; VADModelConfig.SileroVad.Model = path;
//(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。 //(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。
VADModelConfig.SileroVad.Threshold = 0.3f; VADModelConfig.SileroVad.Threshold = 0.25f;
//(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?” //(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?”
VADModelConfig.SileroVad.MinSilenceDuration = 0.2f; VADModelConfig.SileroVad.MinSilenceDuration = 0.2f;
// (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?” // (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?”

View File

@ -110,12 +110,14 @@ namespace VideoAnalysisCore.Common
public FFMPGEHandle FFMPGE { get; set; } public FFMPGEHandle FFMPGE { get; set; }
public SenseVoice senseVoice { get; set; } public SenseVoice senseVoice { get; set; }
public FunASRNano funASRNano { get; set; }
public RedisManager redisManager { get; set; } public RedisManager redisManager { get; set; }
public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager) public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager, FunASRNano funASRNano)
{ {
FFMPGE = fFMPGE; FFMPGE = fFMPGE;
this.senseVoice = senseVoice; this.senseVoice = senseVoice;
this.funASRNano = funASRNano;
this.redisManager = redisManager; this.redisManager = redisManager;
Init(); Init();
redisManager.InitChannel(); redisManager.InitChannel();
@ -137,7 +139,8 @@ namespace VideoAnalysisCore.Common
await scope.ServiceProvider.GetService<DownloadFile>()?.RunTask(task); await scope.ServiceProvider.GetService<DownloadFile>()?.RunTask(task);
}); });
SubscribeList.Add(RedisChannelEnum., FFMPGE.RunAsync); SubscribeList.Add(RedisChannelEnum., FFMPGE.RunAsync);
SubscribeList.Add(RedisChannelEnum., senseVoice.RunTask); //SubscribeList.Add(RedisChannelEnum.解析字幕, senseVoice.RunTask);
SubscribeList.Add(RedisChannelEnum., funASRNano.RunTask);
//SubscribeList.Add(RedisChannelEnum.解析说话人,Speaker.Run); //SubscribeList.Add(RedisChannelEnum.解析说话人,Speaker.Run);
SubscribeList.Add(RedisChannelEnum.AI课程类型, async (task) => SubscribeList.Add(RedisChannelEnum.AI课程类型, async (task) =>
{ {

View File

@ -128,6 +128,14 @@ namespace VideoAnalysisCore.Controllers.Dto
/// 用户中心的云校id /// 用户中心的云校id
/// </summary> /// </summary>
public long? UserCenterCloudSchoolId { get; set; } public long? UserCenterCloudSchoolId { get; set; }
/// <summary>
/// 教材层次
/// </summary>
public CourselevelTypeEnum? CourseLevel { get; set; }
/// <summary>
/// 年级
/// </summary>
public GradeEnum? GradeId { get; set; }
/// <summary> /// <summary>
/// 教育阶段 /// 教育阶段

View File

@ -153,8 +153,8 @@ namespace VideoAnalysisCore.Controllers
public IActionResult AudioRecognition(IFormFile file) public IActionResult AudioRecognition(IFormFile file)
{ {
using var s = file.OpenReadStream(); using var s = file.OpenReadStream();
senseVoice.RunTask(s); var res = senseVoice.RunTask(s);
return Ok(); return Ok(res);
} }
/// <summary> /// <summary>
/// 语音识别 /// 语音识别

View File

@ -0,0 +1,18 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace VideoAnalysisCore.Model.Enum
{
/// <summary>
/// 课程层次
/// </summary>
public enum CourselevelTypeEnum
{
= 1,
= 2,
= 3
}
}

View File

@ -71,7 +71,7 @@
<PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" /> <PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" /> <PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" /> <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.21" /> <PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.22" />
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" /> <PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
<PackageReference Include="SqlSugar.IOC" Version="2.0.0" /> <PackageReference Include="SqlSugar.IOC" Version="2.0.0" />
<PackageReference Include="SqlSugarCore" Version="5.1.4.205" /> <PackageReference Include="SqlSugarCore" Version="5.1.4.205" />