新增 语音转录的测试

This commit is contained in:
小肥羊 2026-01-15 10:26:58 +08:00
parent a8ec291497
commit 1adeba007c
4 changed files with 58 additions and 20 deletions

View File

@ -36,7 +36,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// </summary>
public class FunASRNano
{
static OfflineRecognizer OR = default!;
public static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider;
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
@ -70,7 +70,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
//提示词
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
config.ModelConfig.FunAsrNano.UserPrompt = "这是一中国的课堂视频音频,请你帮我分析出它讲述的内容!";
config.ModelConfig.FunAsrNano.UserPrompt = "这是一中国的课堂视频音频,请你帮我分析出它讲述的内容!";
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
config.ModelConfig.FunAsrNano.TopP = 0.8f;

View File

@ -1,4 +1,5 @@
using Microsoft.Extensions.DependencyInjection;
using Dm.util;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using SherpaOnnx;
using SqlSugar.IOC;
@ -31,10 +32,13 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
}
public class SenseVoice
{
static OfflineRecognizer OR = default!;
public static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider;
public static OfflineRecognizer OR1 = default!;
//测试用
public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>();
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
{
@ -90,6 +94,29 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.ModelConfig.Debug = 1;
#endif
OR = new OfflineRecognizer(config);
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
OfflineRecognizerConfig config1 = new OfflineRecognizerConfig();
config1.FeatConfig.SampleRate = 16000;
config1.FeatConfig.FeatureDim = 80;
config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
config1.ModelConfig.SenseVoice.Language = "zh";
config1.ModelConfig.ModelType = string.Empty;
config1.ModelConfig.NumThreads = numThreads;
config1.ModelConfig.Provider = "cpu";
config1.DecodingMethod = "greedy_search";
config1.ModelConfig.Debug = 1;
OR1 = new OfflineRecognizer(config: config1);
//OR1 = FunASRNano.OR;
}
/// <summary>
@ -99,7 +126,6 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// <returns></returns>
public List<SenseVoiceRes> RunTask(Stream s)
{
if (s is null) throw new Exception("音频路径 is null");
if (OR is null) Init();
return serviceProvider.GetRequiredService<SherpaVad>()
@ -121,6 +147,8 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
return Task.CompletedTask;
}
/// <summary>
/// 获取语音字幕
/// </summary>
@ -135,7 +163,5 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
return stream;
}
}
}

View File

@ -158,7 +158,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
//如果携带任务ID
if (!string.IsNullOrEmpty(task))
{
_ = redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count);
_ = redisManager.AddTaskLog(task, "==>字幕数量" + res.Count);
var captionsStr = res.ToJson();
_ = serviceProvider.GetRequiredService<Repository<VideoTask>>()
.AsUpdateable()

View File

@ -4,6 +4,7 @@ using MapsterMapper;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.DependencyInjection;
using SqlSugar;
using System;
using System.Diagnostics;
@ -149,21 +150,32 @@ namespace VideoAnalysisCore.Controllers
public IActionResult AudioRecognition(IFormFile file)
{
using var s = file.OpenReadStream();
var res = senseVoice.RunTask(s);
s.Position = 0;
var res1 = funASRNano.RunTask(s);
for (int i = 0; i < res.Count(); i++)
senseVoice.RunTask(s);
return Ok();
}
/// <summary>
/// 语音识别
/// </summary>
/// <param name="file">文件流</param>
/// <returns></returns>
[HttpPost(Name = "AudioRecognition_test")]
public IActionResult AudioRecognition_test(IFormFile file)
{
Console.WriteLine($"第{res[i].Start}秒");
Console.WriteLine($"ssv=> {res[i].Text}");
Console.WriteLine($"fun=> {res1[i].Text}");
using var s = file.OpenReadStream();
var x = AppCommon.Services.GetService<FunASRNano>();
x.Init();
senseVoice.RunTask(s);
for (int i = 0; i < SenseVoice.cachedValue.Count(); i++)
{
Console.WriteLine($"字幕索引=>{i}");
Console.WriteLine($"ssv=>{SenseVoice.cachedValue[i].z1}");
Console.WriteLine($"fun=>{SenseVoice.cachedValue[i].z2}");
Console.WriteLine();
}
return Ok(res);
return Ok();
}
/// <summary>
/// 获取FTS_Data str
/// </summary>