新增 语音转录的测试
This commit is contained in:
parent
a8ec291497
commit
1adeba007c
|
|
@ -36,7 +36,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class FunASRNano
|
public class FunASRNano
|
||||||
{
|
{
|
||||||
static OfflineRecognizer OR = default!;
|
public static OfflineRecognizer OR = default!;
|
||||||
private readonly IServiceProvider serviceProvider;
|
private readonly IServiceProvider serviceProvider;
|
||||||
|
|
||||||
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
|
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
|
||||||
|
|
@ -70,7 +70,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
|
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
|
||||||
//提示词
|
//提示词
|
||||||
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
|
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
|
||||||
config.ModelConfig.FunAsrNano.UserPrompt = "这是一趟中国的课堂视频音频,请你帮我分析出它讲述的内容!";
|
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!";
|
||||||
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
|
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
|
||||||
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
|
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
|
||||||
config.ModelConfig.FunAsrNano.TopP = 0.8f;
|
config.ModelConfig.FunAsrNano.TopP = 0.8f;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
using Microsoft.Extensions.DependencyInjection;
|
using Dm.util;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
using SherpaOnnx;
|
using SherpaOnnx;
|
||||||
using SqlSugar.IOC;
|
using SqlSugar.IOC;
|
||||||
|
|
@ -31,10 +32,13 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
}
|
}
|
||||||
public class SenseVoice
|
public class SenseVoice
|
||||||
{
|
{
|
||||||
static OfflineRecognizer OR = default!;
|
public static OfflineRecognizer OR = default!;
|
||||||
|
|
||||||
private readonly IServiceProvider serviceProvider;
|
private readonly IServiceProvider serviceProvider;
|
||||||
|
|
||||||
|
public static OfflineRecognizer OR1 = default!;
|
||||||
|
//测试用
|
||||||
|
public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>();
|
||||||
|
|
||||||
|
|
||||||
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
|
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
|
||||||
{
|
{
|
||||||
|
|
@ -90,6 +94,29 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
config.ModelConfig.Debug = 1;
|
config.ModelConfig.Debug = 1;
|
||||||
#endif
|
#endif
|
||||||
OR = new OfflineRecognizer(config);
|
OR = new OfflineRecognizer(config);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
|
||||||
|
OfflineRecognizerConfig config1 = new OfflineRecognizerConfig();
|
||||||
|
config1.FeatConfig.SampleRate = 16000;
|
||||||
|
config1.FeatConfig.FeatureDim = 80;
|
||||||
|
config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
|
||||||
|
config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
|
||||||
|
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
|
||||||
|
config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
|
||||||
|
config1.ModelConfig.SenseVoice.Language = "zh";
|
||||||
|
config1.ModelConfig.ModelType = string.Empty;
|
||||||
|
config1.ModelConfig.NumThreads = numThreads;
|
||||||
|
config1.ModelConfig.Provider = "cpu";
|
||||||
|
config1.DecodingMethod = "greedy_search";
|
||||||
|
config1.ModelConfig.Debug = 1;
|
||||||
|
OR1 = new OfflineRecognizer(config: config1);
|
||||||
|
//OR1 = FunASRNano.OR;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
@ -99,7 +126,6 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
public List<SenseVoiceRes> RunTask(Stream s)
|
public List<SenseVoiceRes> RunTask(Stream s)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (s is null) throw new Exception("音频路径 is null");
|
if (s is null) throw new Exception("音频路径 is null");
|
||||||
if (OR is null) Init();
|
if (OR is null) Init();
|
||||||
return serviceProvider.GetRequiredService<SherpaVad>()
|
return serviceProvider.GetRequiredService<SherpaVad>()
|
||||||
|
|
@ -121,6 +147,8 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
|
|
||||||
return Task.CompletedTask;
|
return Task.CompletedTask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 获取语音字幕
|
/// 获取语音字幕
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
@ -135,7 +163,5 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -158,7 +158,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
//如果携带任务ID
|
//如果携带任务ID
|
||||||
if (!string.IsNullOrEmpty(task))
|
if (!string.IsNullOrEmpty(task))
|
||||||
{
|
{
|
||||||
_ = redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count);
|
_ = redisManager.AddTaskLog(task, "==>字幕数量" + res.Count);
|
||||||
var captionsStr = res.ToJson();
|
var captionsStr = res.ToJson();
|
||||||
_ = serviceProvider.GetRequiredService<Repository<VideoTask>>()
|
_ = serviceProvider.GetRequiredService<Repository<VideoTask>>()
|
||||||
.AsUpdateable()
|
.AsUpdateable()
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ using MapsterMapper;
|
||||||
using Microsoft.AspNetCore.Authorization;
|
using Microsoft.AspNetCore.Authorization;
|
||||||
using Microsoft.AspNetCore.Http;
|
using Microsoft.AspNetCore.Http;
|
||||||
using Microsoft.AspNetCore.Mvc;
|
using Microsoft.AspNetCore.Mvc;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
using SqlSugar;
|
using SqlSugar;
|
||||||
using System;
|
using System;
|
||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
|
|
@ -149,21 +150,32 @@ namespace VideoAnalysisCore.Controllers
|
||||||
public IActionResult AudioRecognition(IFormFile file)
|
public IActionResult AudioRecognition(IFormFile file)
|
||||||
{
|
{
|
||||||
using var s = file.OpenReadStream();
|
using var s = file.OpenReadStream();
|
||||||
var res = senseVoice.RunTask(s);
|
senseVoice.RunTask(s);
|
||||||
s.Position = 0;
|
return Ok();
|
||||||
var res1 = funASRNano.RunTask(s);
|
}
|
||||||
|
/// <summary>
|
||||||
|
/// 语音识别
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="file">文件流</param>
|
||||||
|
/// <returns></returns>
|
||||||
|
[HttpPost(Name = "AudioRecognition_test")]
|
||||||
|
public IActionResult AudioRecognition_test(IFormFile file)
|
||||||
|
{
|
||||||
|
using var s = file.OpenReadStream();
|
||||||
|
|
||||||
for (int i = 0; i < res.Count(); i++)
|
var x = AppCommon.Services.GetService<FunASRNano>();
|
||||||
|
x.Init();
|
||||||
|
senseVoice.RunTask(s);
|
||||||
|
for (int i = 0; i < SenseVoice.cachedValue.Count(); i++)
|
||||||
{
|
{
|
||||||
Console.WriteLine($"第{res[i].Start}秒");
|
Console.WriteLine($"字幕索引=>{i}");
|
||||||
Console.WriteLine($"ssv=> {res[i].Text}");
|
Console.WriteLine($"ssv=>{SenseVoice.cachedValue[i].z1}");
|
||||||
Console.WriteLine($"fun=> {res1[i].Text}");
|
Console.WriteLine($"fun=>{SenseVoice.cachedValue[i].z2}");
|
||||||
Console.WriteLine();
|
Console.WriteLine();
|
||||||
}
|
}
|
||||||
return Ok(res);
|
return Ok();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 获取FTS_Data str
|
/// 获取FTS_Data str
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue