using Dm.util; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Options; using SherpaOnnx; using SqlSugar.IOC; using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using System.Threading.Tasks; using VideoAnalysisCore.Common; using VideoAnalysisCore.Model; using VideoAnalysisCore.Model.Enum; namespace VideoAnalysisCore.AICore.SherpaOnnx { public static class SenseVoiceExpand { /// /// 添加 SenseVoice 语音转文字 /// /// public static void AddSenseVoiceExpand(this IServiceCollection services) { services.AddSingleton(); } } public class SenseVoice { public static OfflineRecognizer OR = default!; private readonly IServiceProvider serviceProvider; public static OfflineRecognizer OR1 = default!; //测试用 public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>(); public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider) { this.serviceProvider = serviceProvider; } /// /// 初始化 SenseVoice /// /// 默认6线程 /// 是否使用gpu 报错请看安装CUDA环境 public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false) { Console.WriteLine("初始化 SenseVoice"); OfflineRecognizerConfig config = new OfflineRecognizerConfig(); //采样率 config.FeatConfig.SampleRate = 16000; //用于训练模型的特征维度 config.FeatConfig.FeatureDim = 80; // Path to tokens.txt var AIModelVersion_270717 = "sherpa-onnx-sense-voice-24-07-17"; config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt"); //SenseVoice 模型 config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx"); //1 使用逆文本规范化处理感官语音 [控制标点符号生成]。 config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1; //反转文本规范化规则 fst 的路径 //config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst"); config.ModelConfig.SenseVoice.Language = "zh"; //模型类型 config.ModelConfig.ModelType = string.Empty; config.ModelConfig.NumThreads = numThreads; config.ModelConfig.Provider = "cpu"; //需要使用GPU if (!useGPU) config.ModelConfig.Provider = "cuda"; #region 有效的解码方法 //贪婪搜索[greedy_search] 改进的波束搜索 [modified_beam_search] //贪婪搜索 config.DecodingMethod = "greedy_search"; ////改进的波束搜索 //config.DecodingMethod = "modified_beam_search"; ////仅在 --decoding--method 为 [波束搜索]modified_beam_search 时使用。 ////它指定搜索过程中要保留的活动路径数 //config.MaxActivePaths =4; #endregion #if DEBUG config.ModelConfig.Debug = 1; #endif OR = new OfflineRecognizer(config); //var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17"; //OfflineRecognizerConfig config1 = new OfflineRecognizerConfig(); //config1.FeatConfig.SampleRate = 16000; //config1.FeatConfig.FeatureDim = 80; //config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt"); //config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx"); ////1 使用逆文本规范化处理感官语音 [控制标点符号生成]。 //config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1; //config1.ModelConfig.SenseVoice.Language = "zh"; //config1.ModelConfig.ModelType = string.Empty; //config1.ModelConfig.NumThreads = numThreads; //config1.ModelConfig.Provider = "cpu"; //config1.DecodingMethod = "greedy_search"; //config1.ModelConfig.Debug = 1; //OR1 = new OfflineRecognizer(config: config1); //OR1 = FunASRNano.OR; } /// /// 获取语音字幕 /// /// /// public List RunTask(Stream s) { if (s is null) throw new Exception("音频路径 is null"); if (OR is null) Init(); return serviceProvider.GetRequiredService() .TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5); } /// /// 获取语音字幕 /// /// /// public Task RunTask(string task) { var filePath = Path.Combine(task.LocalPath(), "task.wav"); if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) throw new Exception("task 音频路径未找到"); if (OR is null) Init(); serviceProvider.GetRequiredService() .TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.ten_vad_324); return Task.CompletedTask; } /// /// 获取语音字幕 /// /// 采样率 /// 采样值(样品) /// 结果流 public OfflineStream SoundHandle(int sampleRate, float[] samples) { var stream = OR.CreateStream(); stream.AcceptWaveform(sampleRate, samples); OR.Decode(stream); return stream; } } }