新增 语音转录的测试

This commit is contained in:
小肥羊 2026-01-15 10:26:58 +08:00
parent a8ec291497
commit 1adeba007c
4 changed files with 58 additions and 20 deletions

View File

@ -36,7 +36,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// </summary> /// </summary>
public class FunASRNano public class FunASRNano
{ {
static OfflineRecognizer OR = default!; public static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider; private readonly IServiceProvider serviceProvider;
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider) public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
@ -70,7 +70,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B"); config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
//提示词 //提示词
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant."; config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
config.ModelConfig.FunAsrNano.UserPrompt = "这是一中国的课堂视频音频,请你帮我分析出它讲述的内容!"; config.ModelConfig.FunAsrNano.UserPrompt = "这是一中国的课堂视频音频,请你帮我分析出它讲述的内容!";
config.ModelConfig.FunAsrNano.MaxNewTokens = 512; config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
config.ModelConfig.FunAsrNano.Temperature = 1E-06f; config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
config.ModelConfig.FunAsrNano.TopP = 0.8f; config.ModelConfig.FunAsrNano.TopP = 0.8f;

View File

@ -1,4 +1,5 @@
using Microsoft.Extensions.DependencyInjection; using Dm.util;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using SherpaOnnx; using SherpaOnnx;
using SqlSugar.IOC; using SqlSugar.IOC;
@ -31,10 +32,13 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
} }
public class SenseVoice public class SenseVoice
{ {
static OfflineRecognizer OR = default!; public static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider; private readonly IServiceProvider serviceProvider;
public static OfflineRecognizer OR1 = default!;
//测试用
public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>();
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider) public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
{ {
@ -90,6 +94,29 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.ModelConfig.Debug = 1; config.ModelConfig.Debug = 1;
#endif #endif
OR = new OfflineRecognizer(config); OR = new OfflineRecognizer(config);
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
OfflineRecognizerConfig config1 = new OfflineRecognizerConfig();
config1.FeatConfig.SampleRate = 16000;
config1.FeatConfig.FeatureDim = 80;
config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
config1.ModelConfig.SenseVoice.Language = "zh";
config1.ModelConfig.ModelType = string.Empty;
config1.ModelConfig.NumThreads = numThreads;
config1.ModelConfig.Provider = "cpu";
config1.DecodingMethod = "greedy_search";
config1.ModelConfig.Debug = 1;
OR1 = new OfflineRecognizer(config: config1);
//OR1 = FunASRNano.OR;
} }
/// <summary> /// <summary>
@ -99,7 +126,6 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// <returns></returns> /// <returns></returns>
public List<SenseVoiceRes> RunTask(Stream s) public List<SenseVoiceRes> RunTask(Stream s)
{ {
if (s is null) throw new Exception("音频路径 is null"); if (s is null) throw new Exception("音频路径 is null");
if (OR is null) Init(); if (OR is null) Init();
return serviceProvider.GetRequiredService<SherpaVad>() return serviceProvider.GetRequiredService<SherpaVad>()
@ -121,6 +147,8 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
return Task.CompletedTask; return Task.CompletedTask;
} }
/// <summary> /// <summary>
/// 获取语音字幕 /// 获取语音字幕
/// </summary> /// </summary>
@ -135,7 +163,5 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
return stream; return stream;
} }
} }
} }

View File

@ -158,7 +158,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
//如果携带任务ID //如果携带任务ID
if (!string.IsNullOrEmpty(task)) if (!string.IsNullOrEmpty(task))
{ {
_ = redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count); _ = redisManager.AddTaskLog(task, "==>字幕数量" + res.Count);
var captionsStr = res.ToJson(); var captionsStr = res.ToJson();
_ = serviceProvider.GetRequiredService<Repository<VideoTask>>() _ = serviceProvider.GetRequiredService<Repository<VideoTask>>()
.AsUpdateable() .AsUpdateable()

View File

@ -4,6 +4,7 @@ using MapsterMapper;
using Microsoft.AspNetCore.Authorization; using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc; using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.DependencyInjection;
using SqlSugar; using SqlSugar;
using System; using System;
using System.Diagnostics; using System.Diagnostics;
@ -149,21 +150,32 @@ namespace VideoAnalysisCore.Controllers
public IActionResult AudioRecognition(IFormFile file) public IActionResult AudioRecognition(IFormFile file)
{ {
using var s = file.OpenReadStream(); using var s = file.OpenReadStream();
var res = senseVoice.RunTask(s); senseVoice.RunTask(s);
s.Position = 0; return Ok();
var res1 = funASRNano.RunTask(s); }
/// <summary>
for (int i = 0; i < res.Count(); i++) /// 语音识别
/// </summary>
/// <param name="file">文件流</param>
/// <returns></returns>
[HttpPost(Name = "AudioRecognition_test")]
public IActionResult AudioRecognition_test(IFormFile file)
{ {
Console.WriteLine($"第{res[i].Start}秒"); using var s = file.OpenReadStream();
Console.WriteLine($"ssv=> {res[i].Text}");
Console.WriteLine($"fun=> {res1[i].Text}"); var x = AppCommon.Services.GetService<FunASRNano>();
x.Init();
senseVoice.RunTask(s);
for (int i = 0; i < SenseVoice.cachedValue.Count(); i++)
{
Console.WriteLine($"字幕索引=>{i}");
Console.WriteLine($"ssv=>{SenseVoice.cachedValue[i].z1}");
Console.WriteLine($"fun=>{SenseVoice.cachedValue[i].z2}");
Console.WriteLine(); Console.WriteLine();
} }
return Ok(res); return Ok();
} }
/// <summary> /// <summary>
/// 获取FTS_Data str /// 获取FTS_Data str
/// </summary> /// </summary>