using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using SherpaOnnx;
using SqlSugar.IOC;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using VideoAnalysisCore.Common;
using VideoAnalysisCore.Model;
using VideoAnalysisCore.Model.Enum;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
public static class SenseVoiceExpand
{
///
/// 添加 SenseVoice 语音转文字
///
///
public static void AddSenseVoiceExpand(this IServiceCollection services)
{
services.AddSingleton();
}
}
public class SenseVoice
{
static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider;
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
{
this.serviceProvider = serviceProvider;
}
///
/// 初始化 SenseVoice
///
/// 默认6线程
/// 是否使用gpu 报错请看安装CUDA环境
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
{
Console.WriteLine("初始化 SenseVoice");
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
//采样率
config.FeatConfig.SampleRate = 16000;
//用于训练模型的特征维度
config.FeatConfig.FeatureDim = 80;
// Path to tokens.txt
var AIModelVersion_270717 = "sherpa-onnx-sense-voice-24-07-17";
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
//SenseVoice 模型
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
//反转文本规范化规则 fst 的路径
//config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst");
config.ModelConfig.SenseVoice.Language = "zh";
//模型类型
config.ModelConfig.ModelType = string.Empty;
config.ModelConfig.NumThreads = numThreads;
config.ModelConfig.Provider = "cpu";
//需要使用GPU
if (!useGPU)
config.ModelConfig.Provider = "cuda";
#region 有效的解码方法
//贪婪搜索[greedy_search] 改进的波束搜索 [modified_beam_search]
//贪婪搜索
config.DecodingMethod = "greedy_search";
////改进的波束搜索
//config.DecodingMethod = "modified_beam_search";
////仅在 --decoding--method 为 [波束搜索]modified_beam_search 时使用。
////它指定搜索过程中要保留的活动路径数
//config.MaxActivePaths =4;
#endregion
#if DEBUG
config.ModelConfig.Debug = 1;
#endif
OR = new OfflineRecognizer(config);
}
///
/// 获取语音字幕
///
///
///
public List RunTask(Stream s)
{
if (s is null) throw new Exception("音频路径 is null");
return serviceProvider.GetRequiredService()
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
}
///
/// 获取语音字幕
///
///
///
public Task RunTask(string task)
{
var filePath = Path.Combine(task.LocalPath(), "task.wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
serviceProvider.GetRequiredService()
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
return Task.CompletedTask;
}
///
/// 获取语音字幕
///
/// 采样率
/// 采样值(样品)
/// 结果流
public OfflineStream SoundHandle(int sampleRate, float[] samples)
{
var stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, samples);
OR.Decode(stream);
return stream;
}
}
}