Learn.VideoAnalysis/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs

133 lines
5.3 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using SherpaOnnx;
using SqlSugar.IOC;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using VideoAnalysisCore.Common;
using VideoAnalysisCore.Model;
using VideoAnalysisCore.Model.Enum;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
public static class FunASRNanoExpand
{
/// <summary>
/// 添加 SenseVoice 语音转文字
/// </summary>
/// <param name="services"></param>
public static void AddFunASRNanoExpand(this IServiceCollection services)
{
services.AddSingleton<SenseVoice>();
}
}
/// <summary>
/// 基于 sherpa-onnx 平台接入的 Fun-ASR-Nano-2512
/// <para>版本 Fun-ASR-Nano-2512</para>
/// <para>来源 https://github.com/modelscope/FunASR/blob/main/README_zh.md</para>
/// </summary>
public class FunASRNano
{
static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider;
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
{
this.serviceProvider = serviceProvider;
}
/// <summary>
/// 初始化 SenseVoice
/// </summary>
/// <param name="numThreads">默认6线程</param>
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
{
Console.WriteLine("初始化 FunASRNano");
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
//采样率
config.FeatConfig.SampleRate = 16000;
//用于训练模型的特征维度
config.FeatConfig.FeatureDim = 80;
var topFolder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-funasr-nano-fp16-2025-12-30");
//模型配置
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
//接入的大语言模型
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx");
//插入预训练模型如Transformer的小型可训练模块 (如语音识别、情感分析)
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
//分词器
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
//提示词
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
config.ModelConfig.FunAsrNano.UserPrompt = "这是一趟中国的课堂视频音频,请你帮我分析出它讲述的内容!";
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
config.ModelConfig.FunAsrNano.TopP = 0.8f;
config.ModelConfig.FunAsrNano.Seed = 42;
//模型类型
config.ModelConfig.ModelType = string.Empty;
config.ModelConfig.NumThreads = numThreads;
config.ModelConfig.Provider = "cpu";
//需要使用GPU
if (!useGPU)
config.ModelConfig.Provider = "cuda";
#if DEBUG
config.ModelConfig.Debug = 1;
#endif
OR = new OfflineRecognizer(config);
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
public List<SenseVoiceRes> RunTask(Stream s)
{
if (s is null) throw new Exception("音频路径 is null");
return serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="task"></param>
/// <returns></returns>
public Task RunTask(string task)
{
var filePath = Path.Combine(task.LocalPath(), "task.wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
return Task.CompletedTask;
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="sampleRate">采样率</param>
/// <param name="samples">采样值(样品)</param>
/// <returns>结果流</returns>
public OfflineStream SoundHandle(int sampleRate, float[] samples)
{
var stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, samples);
OR.Decode(stream);
return stream;
}
}
}