163 lines
6.4 KiB
C#
163 lines
6.4 KiB
C#
using Dm.util;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Options;
|
|
using SherpaOnnx;
|
|
using SqlSugar.IOC;
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using VideoAnalysisCore.Common;
|
|
using VideoAnalysisCore.Model;
|
|
using VideoAnalysisCore.Model.Enum;
|
|
|
|
namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|
{
|
|
public static class SenseVoiceExpand
|
|
{
|
|
|
|
/// <summary>
|
|
/// 添加 SenseVoice 语音转文字
|
|
/// </summary>
|
|
/// <param name="services"></param>
|
|
public static void AddSenseVoiceExpand(this IServiceCollection services)
|
|
{
|
|
services.AddSingleton<SenseVoice>();
|
|
}
|
|
}
|
|
public class SenseVoice
|
|
{
|
|
public static OfflineRecognizer OR = default!;
|
|
private readonly IServiceProvider serviceProvider;
|
|
|
|
public static OfflineRecognizer OR1 = default!;
|
|
//测试用
|
|
public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>();
|
|
|
|
|
|
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
|
|
{
|
|
this.serviceProvider = serviceProvider;
|
|
}
|
|
|
|
/// <summary>
|
|
/// 初始化 SenseVoice
|
|
/// </summary>
|
|
/// <param name="numThreads">默认6线程</param>
|
|
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境<see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
|
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
|
|
{
|
|
Console.WriteLine("初始化 SenseVoice");
|
|
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
|
//采样率
|
|
config.FeatConfig.SampleRate = 16000;
|
|
//用于训练模型的特征维度
|
|
config.FeatConfig.FeatureDim = 80;
|
|
// Path to tokens.txt
|
|
var AIModelVersion_270717 = "sherpa-onnx-sense-voice-24-07-17";
|
|
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
|
|
//SenseVoice 模型
|
|
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
|
|
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
|
|
config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
|
|
//反转文本规范化规则 fst 的路径
|
|
//config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst");
|
|
|
|
config.ModelConfig.SenseVoice.Language = "zh";
|
|
//模型类型
|
|
config.ModelConfig.ModelType = string.Empty;
|
|
config.ModelConfig.NumThreads = numThreads;
|
|
config.ModelConfig.Provider = "cpu";
|
|
//需要使用GPU
|
|
if (!useGPU)
|
|
config.ModelConfig.Provider = "cuda";
|
|
|
|
#region 有效的解码方法
|
|
//贪婪搜索[greedy_search] 改进的波束搜索 [modified_beam_search]
|
|
//贪婪搜索
|
|
config.DecodingMethod = "greedy_search";
|
|
|
|
////改进的波束搜索
|
|
//config.DecodingMethod = "modified_beam_search";
|
|
////仅在 --decoding--method 为 [波束搜索]modified_beam_search 时使用。
|
|
////它指定搜索过程中要保留的活动路径数
|
|
//config.MaxActivePaths =4;
|
|
#endregion
|
|
|
|
|
|
#if DEBUG
|
|
config.ModelConfig.Debug = 1;
|
|
#endif
|
|
OR = new OfflineRecognizer(config);
|
|
|
|
//var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
|
|
//OfflineRecognizerConfig config1 = new OfflineRecognizerConfig();
|
|
//config1.FeatConfig.SampleRate = 16000;
|
|
//config1.FeatConfig.FeatureDim = 80;
|
|
//config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
|
|
//config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
|
|
////1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
|
|
//config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
|
|
//config1.ModelConfig.SenseVoice.Language = "zh";
|
|
//config1.ModelConfig.ModelType = string.Empty;
|
|
//config1.ModelConfig.NumThreads = numThreads;
|
|
//config1.ModelConfig.Provider = "cpu";
|
|
//config1.DecodingMethod = "greedy_search";
|
|
//config1.ModelConfig.Debug = 1;
|
|
//OR1 = new OfflineRecognizer(config: config1);
|
|
//OR1 = FunASRNano.OR;
|
|
|
|
}
|
|
|
|
/// <summary>
|
|
/// 获取语音字幕
|
|
/// </summary>
|
|
/// <param name="s"></param>
|
|
/// <returns></returns>
|
|
public List<SenseVoiceRes> RunTask(Stream s)
|
|
{
|
|
if (s is null) throw new Exception("音频路径 is null");
|
|
if (OR is null) Init();
|
|
return serviceProvider.GetRequiredService<SherpaVad>()
|
|
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
|
|
}
|
|
/// <summary>
|
|
/// 获取语音字幕
|
|
/// </summary>
|
|
/// <param name="task"></param>
|
|
/// <returns></returns>
|
|
public Task RunTask(string task)
|
|
{
|
|
var filePath = Path.Combine(task.LocalPath(), "task.wav");
|
|
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
|
throw new Exception("task 音频路径未找到");
|
|
if (OR is null) Init();
|
|
serviceProvider.GetRequiredService<SherpaVad>()
|
|
.TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.ten_vad_324);
|
|
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
|
|
/// <summary>
|
|
/// 获取语音字幕
|
|
/// </summary>
|
|
/// <param name="sampleRate">采样率</param>
|
|
/// <param name="samples">采样值(样品)</param>
|
|
/// <returns>结果流</returns>
|
|
public OfflineStream SoundHandle(int sampleRate, float[] samples)
|
|
{
|
|
var stream = OR.CreateStream();
|
|
stream.AcceptWaveform(sampleRate, samples);
|
|
OR.Decode(stream);
|
|
return stream;
|
|
}
|
|
|
|
}
|
|
}
|