133 lines
5.3 KiB
C#
133 lines
5.3 KiB
C#
using Microsoft.Extensions.DependencyInjection;
|
||
using Microsoft.Extensions.Options;
|
||
using SherpaOnnx;
|
||
using SqlSugar.IOC;
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using System.Diagnostics;
|
||
using System.IO;
|
||
using System.Linq;
|
||
using System.Text;
|
||
using System.Text.Json;
|
||
using System.Text.RegularExpressions;
|
||
using System.Threading.Tasks;
|
||
using VideoAnalysisCore.Common;
|
||
using VideoAnalysisCore.Model;
|
||
using VideoAnalysisCore.Model.Enum;
|
||
|
||
namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||
{
|
||
public static class FunASRNanoExpand
|
||
{
|
||
|
||
/// <summary>
|
||
/// 添加 SenseVoice 语音转文字
|
||
/// </summary>
|
||
/// <param name="services"></param>
|
||
public static void AddFunASRNanoExpand(this IServiceCollection services)
|
||
{
|
||
services.AddSingleton<SenseVoice>();
|
||
}
|
||
}
|
||
/// <summary>
|
||
/// 基于 sherpa-onnx 平台接入的 Fun-ASR-Nano-2512
|
||
/// <para>版本 Fun-ASR-Nano-2512</para>
|
||
/// <para>来源 https://github.com/modelscope/FunASR/blob/main/README_zh.md</para>
|
||
/// </summary>
|
||
public class FunASRNano
|
||
{
|
||
static OfflineRecognizer OR = default!;
|
||
private readonly IServiceProvider serviceProvider;
|
||
|
||
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
|
||
{
|
||
this.serviceProvider = serviceProvider;
|
||
}
|
||
|
||
/// <summary>
|
||
/// 初始化 SenseVoice
|
||
/// </summary>
|
||
/// <param name="numThreads">默认6线程</param>
|
||
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
||
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
|
||
{
|
||
Console.WriteLine("初始化 FunASRNano");
|
||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||
//采样率
|
||
config.FeatConfig.SampleRate = 16000;
|
||
//用于训练模型的特征维度
|
||
config.FeatConfig.FeatureDim = 80;
|
||
var topFolder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-funasr-nano-fp16-2025-12-30");
|
||
|
||
//模型配置
|
||
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量
|
||
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
|
||
//接入的大语言模型
|
||
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx");
|
||
//插入预训练模型(如Transformer)的小型可训练模块 (如语音识别、情感分析)
|
||
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
|
||
//分词器
|
||
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
|
||
//提示词
|
||
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
|
||
config.ModelConfig.FunAsrNano.UserPrompt = "这是一趟中国的课堂视频音频,请你帮我分析出它讲述的内容!";
|
||
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
|
||
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
|
||
config.ModelConfig.FunAsrNano.TopP = 0.8f;
|
||
config.ModelConfig.FunAsrNano.Seed = 42;
|
||
|
||
//模型类型
|
||
config.ModelConfig.ModelType = string.Empty;
|
||
config.ModelConfig.NumThreads = numThreads;
|
||
config.ModelConfig.Provider = "cpu";
|
||
//需要使用GPU
|
||
if (!useGPU)
|
||
config.ModelConfig.Provider = "cuda";
|
||
#if DEBUG
|
||
config.ModelConfig.Debug = 1;
|
||
#endif
|
||
OR = new OfflineRecognizer(config);
|
||
}
|
||
|
||
/// <summary>
|
||
/// 获取语音字幕
|
||
/// </summary>
|
||
/// <param name="s"></param>
|
||
/// <returns></returns>
|
||
public List<SenseVoiceRes> RunTask(Stream s)
|
||
{
|
||
if (s is null) throw new Exception("音频路径 is null");
|
||
return serviceProvider.GetRequiredService<SherpaVad>()
|
||
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
|
||
}
|
||
/// <summary>
|
||
/// 获取语音字幕
|
||
/// </summary>
|
||
/// <param name="task"></param>
|
||
/// <returns></returns>
|
||
public Task RunTask(string task)
|
||
{
|
||
var filePath = Path.Combine(task.LocalPath(), "task.wav");
|
||
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
||
throw new Exception("task 音频路径未找到");
|
||
serviceProvider.GetRequiredService<SherpaVad>()
|
||
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
|
||
|
||
return Task.CompletedTask;
|
||
}
|
||
/// <summary>
|
||
/// 获取语音字幕
|
||
/// </summary>
|
||
/// <param name="sampleRate">采样率</param>
|
||
/// <param name="samples">采样值(样品)</param>
|
||
/// <returns>结果流</returns>
|
||
public OfflineStream SoundHandle(int sampleRate, float[] samples)
|
||
{
|
||
var stream = OR.CreateStream();
|
||
stream.AcceptWaveform(sampleRate, samples);
|
||
OR.Decode(stream);
|
||
return stream;
|
||
}
|
||
}
|
||
}
|