diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs b/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs index 5c97cf1..a0aec45 100644 --- a/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs +++ b/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs @@ -36,7 +36,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx /// public class FunASRNano { - static OfflineRecognizer OR = default!; + public static OfflineRecognizer OR = default!; private readonly IServiceProvider serviceProvider; public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider) @@ -70,7 +70,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B"); //提示词 config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant."; - config.ModelConfig.FunAsrNano.UserPrompt = "这是一趟中国的课堂视频音频,请你帮我分析出它讲述的内容!"; + config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!"; config.ModelConfig.FunAsrNano.MaxNewTokens = 512; config.ModelConfig.FunAsrNano.Temperature = 1E-06f; config.ModelConfig.FunAsrNano.TopP = 0.8f; @@ -128,7 +128,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx var stream = OR.CreateStream(); stream.AcceptWaveform(sampleRate, samples); OR.Decode(stream); - return stream; + return stream; } } } diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs index 50663fa..163a25a 100644 --- a/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs +++ b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs @@ -1,4 +1,5 @@ -using Microsoft.Extensions.DependencyInjection; +using Dm.util; +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Options; using SherpaOnnx; using SqlSugar.IOC; @@ -31,10 +32,13 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx } public class SenseVoice { - static OfflineRecognizer OR = default!; - + public static OfflineRecognizer OR = default!; private readonly IServiceProvider serviceProvider; + public static OfflineRecognizer OR1 = default!; + //测试用 + public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>(); + public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider) { @@ -90,6 +94,29 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx config.ModelConfig.Debug = 1; #endif OR = new OfflineRecognizer(config); + + + + + + + var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17"; + OfflineRecognizerConfig config1 = new OfflineRecognizerConfig(); + config1.FeatConfig.SampleRate = 16000; + config1.FeatConfig.FeatureDim = 80; + config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt"); + config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx"); + //1 使用逆文本规范化处理感官语音 [控制标点符号生成]。 + config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1; + config1.ModelConfig.SenseVoice.Language = "zh"; + config1.ModelConfig.ModelType = string.Empty; + config1.ModelConfig.NumThreads = numThreads; + config1.ModelConfig.Provider = "cpu"; + config1.DecodingMethod = "greedy_search"; + config1.ModelConfig.Debug = 1; + OR1 = new OfflineRecognizer(config: config1); + //OR1 = FunASRNano.OR; + } /// @@ -99,7 +126,6 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx /// public List RunTask(Stream s) { - if (s is null) throw new Exception("音频路径 is null"); if (OR is null) Init(); return serviceProvider.GetRequiredService() @@ -121,21 +147,21 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx return Task.CompletedTask; } + + /// /// 获取语音字幕 /// /// 采样率 /// 采样值(样品) /// 结果流 - public OfflineStream SoundHandle(int sampleRate, float[] samples) + public OfflineStream SoundHandle(int sampleRate, float[] samples) { var stream = OR.CreateStream(); stream.AcceptWaveform(sampleRate, samples); OR.Decode(stream); return stream; } - - } } diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs b/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs index 7175942..3e1d983 100644 --- a/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs +++ b/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs @@ -158,7 +158,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx //如果携带任务ID if (!string.IsNullOrEmpty(task)) { - _ = redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count); + _ = redisManager.AddTaskLog(task, "==>字幕数量" + res.Count); var captionsStr = res.ToJson(); _ = serviceProvider.GetRequiredService>() .AsUpdateable() diff --git a/VideoAnalysisCore/Controllers/VideoTaskController.cs b/VideoAnalysisCore/Controllers/VideoTaskController.cs index 12b37b7..614c2e5 100644 --- a/VideoAnalysisCore/Controllers/VideoTaskController.cs +++ b/VideoAnalysisCore/Controllers/VideoTaskController.cs @@ -4,6 +4,7 @@ using MapsterMapper; using Microsoft.AspNetCore.Authorization; using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.DependencyInjection; using SqlSugar; using System; using System.Diagnostics; @@ -149,21 +150,32 @@ namespace VideoAnalysisCore.Controllers public IActionResult AudioRecognition(IFormFile file) { using var s = file.OpenReadStream(); - var res = senseVoice.RunTask(s); - s.Position = 0; - var res1 = funASRNano.RunTask(s); + senseVoice.RunTask(s); + return Ok(); + } + /// + /// 语音识别 + /// + /// 文件流 + /// + [HttpPost(Name = "AudioRecognition_test")] + public IActionResult AudioRecognition_test(IFormFile file) + { + using var s = file.OpenReadStream(); - for (int i = 0; i < res.Count(); i++) + var x = AppCommon.Services.GetService(); + x.Init(); + senseVoice.RunTask(s); + for (int i = 0; i < SenseVoice.cachedValue.Count(); i++) { - Console.WriteLine($"第{res[i].Start}秒"); - Console.WriteLine($"ssv=> {res[i].Text}"); - Console.WriteLine($"fun=> {res1[i].Text}"); + Console.WriteLine($"字幕索引=>{i}"); + Console.WriteLine($"ssv=>{SenseVoice.cachedValue[i].z1}"); + Console.WriteLine($"fun=>{SenseVoice.cachedValue[i].z2}"); Console.WriteLine(); } - return Ok(res); + return Ok(); } - /// /// 获取FTS_Data str ///