Learn.VideoAnalysis/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs

93 lines
3.5 KiB
C#

using Microsoft.Extensions.Options;
using SherpaOnnx;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using VideoAnalysisCore.Common;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
public class SenseVoice
{
static OfflineRecognizer OR =default!;
/// <summary>
/// 初始化 SenseVoice
/// </summary>
/// <param name="speakerNumber"></param>
/// <param name="threshold"></param>
public static void Init(int speakerNumber = 0, double threshold = 0.6)
{
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
//采样率
config.FeatConfig.SampleRate = 16000;
//用于训练模型的特征维度
config.FeatConfig.FeatureDim = 80;
//Path to tokens.txt
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "tokens.txt");
//SenseVoice 模型
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "model.onnx");
//1 使用逆文本规范化处理感官语音。
config.ModelConfig.SenseVoice.UseInverseTextNormalization =1;
//模型类型
config.ModelConfig.ModelType = string.Empty;
#region
//贪婪搜索[greedy_search] 改进的波束搜索 [modified_beam_search]
//贪婪搜索
config.DecodingMethod = "greedy_search";
////改进的波束搜索
//config.DecodingMethod = "modified_beam_search";
////仅在 --decoding--method 为 [波束搜索]modified_beam_search 时使用。
////它指定搜索过程中要保留的活动路径数
//config.MaxActivePaths =4;
#endregion
//热词目录
config.HotwordsFile = string.Empty;
//热词得分
config.HotwordsScore =1.5f ;
//反转文本规范化规则 fst 的路径
config.RuleFsts = string.Empty;
config.ModelConfig.Debug = 0;
OR = new OfflineRecognizer(config);
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="task"></param>
/// <returns></returns>
public static async Task RunTask(string task)
{
var filePath = Path.Combine(task.LocalPath(), task + ".wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
OfflineStream stream = OR.CreateStream();
WaveReader waveReader = new WaveReader(filePath);
stream.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
OR.Decode(stream);
var r = stream.Result;
Console.WriteLine("--------------------");
Console.WriteLine("Text: {0}", r.Text);
Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
if (r.Timestamps != null && r.Timestamps.Length > 0)
{
Console.Write("Timestamps: [");
var sep = "";
for (int k = 0; k != r.Timestamps.Length; ++k)
{
Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
sep = ", ";
}
Console.WriteLine("]");
}
await Task.CompletedTask;
}
}
}