From d52504a3a0b65a0c72bb9bd137df1a6c3dda5e56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E8=82=A5=E7=BE=8A?= <1048382248@qq.com>
Date: Tue, 13 Jan 2026 17:42:07 +0800
Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20=E5=B0=81=E8=A3=85vad,?=
=?UTF-8?q?=E6=8E=A5=E5=85=A5=20FunASRNano?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
VideoAnalysis/Program.cs | 1 +
.../AICore/SherpaOnnx/FunASRNano.cs | 132 +++++++++++
.../AICore/SherpaOnnx/SenseVoice.cs | 198 +++--------------
.../AICore/SherpaOnnx/SherpaVad.cs | 210 ++++++++++++++++++
.../Controllers/VideoTaskController.cs | 8 +-
VideoAnalysisCore/VideoAnalysisCore.csproj | 2 +-
6 files changed, 375 insertions(+), 176 deletions(-)
create mode 100644 VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs
create mode 100644 VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs
diff --git a/VideoAnalysis/Program.cs b/VideoAnalysis/Program.cs
index bddd625..a383bad 100644
--- a/VideoAnalysis/Program.cs
+++ b/VideoAnalysis/Program.cs
@@ -53,6 +53,7 @@ namespace Learn.VideoAnalysis
builder.Services.AddAlibabaCloudVod();
builder.Services.AddAliyunOSS();
builder.Services.AddSenseVoiceExpand();
+ builder.Services.AddSherpaVadExpand();
//builder.Services.AddSpeakerAI();
builder.Services.AddCoravel();
diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs b/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs
new file mode 100644
index 0000000..89d41da
--- /dev/null
+++ b/VideoAnalysisCore/AICore/SherpaOnnx/FunASRNano.cs
@@ -0,0 +1,132 @@
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Options;
+using SherpaOnnx;
+using SqlSugar.IOC;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.Json;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using VideoAnalysisCore.Common;
+using VideoAnalysisCore.Model;
+using VideoAnalysisCore.Model.Enum;
+
+namespace VideoAnalysisCore.AICore.SherpaOnnx
+{
+ public static class FunASRNanoExpand
+ {
+
+ ///
+ /// 添加 SenseVoice 语音转文字
+ ///
+ ///
+ public static void AddFunASRNanoExpand(this IServiceCollection services)
+ {
+ services.AddSingleton();
+ }
+ }
+ ///
+ /// 基于 sherpa-onnx 平台接入的 Fun-ASR-Nano-2512
+ /// 版本 Fun-ASR-Nano-2512
+ /// 来源 https://github.com/modelscope/FunASR/blob/main/README_zh.md
+ ///
+ public class FunASRNano
+ {
+ static OfflineRecognizer OR = default!;
+ private readonly IServiceProvider serviceProvider;
+
+ public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
+ {
+ this.serviceProvider = serviceProvider;
+ }
+
+ ///
+ /// 初始化 SenseVoice
+ ///
+ /// 默认6线程
+ /// 是否使用gpu 报错请看安装CUDA环境
+ public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
+ {
+ Console.WriteLine("初始化 FunASRNano");
+ OfflineRecognizerConfig config = new OfflineRecognizerConfig();
+ //采样率
+ config.FeatConfig.SampleRate = 16000;
+ //用于训练模型的特征维度
+ config.FeatConfig.FeatureDim = 80;
+ var topFolder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-funasr-nano-fp16-2025-12-30");
+
+ //模型配置
+ //将非结构化数据(文本、图像、音频等)转换为低维稠密向量
+ config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
+ //接入的大语言模型
+ config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx");
+ //插入预训练模型(如Transformer)的小型可训练模块 (如语音识别、情感分析)
+ config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
+ //分词器
+ config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
+ //提示词
+ config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
+ config.ModelConfig.FunAsrNano.UserPrompt = "这是一趟中国的课堂视频音频,请你帮我分析出它讲述的内容!";
+ config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
+ config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
+ config.ModelConfig.FunAsrNano.TopP = 0.8f;
+ config.ModelConfig.FunAsrNano.Seed = 42;
+
+ //模型类型
+ config.ModelConfig.ModelType = string.Empty;
+ config.ModelConfig.NumThreads = numThreads;
+ config.ModelConfig.Provider = "cpu";
+ //需要使用GPU
+ if (!useGPU)
+ config.ModelConfig.Provider = "cuda";
+#if DEBUG
+ config.ModelConfig.Debug = 1;
+#endif
+ OR = new OfflineRecognizer(config);
+ }
+
+ ///
+ /// 获取语音字幕
+ ///
+ ///
+ ///
+ public List RunTask(Stream s)
+ {
+ if (s is null) throw new Exception("音频路径 is null");
+ return serviceProvider.GetRequiredService()
+ .TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
+ }
+ ///
+ /// 获取语音字幕
+ ///
+ ///
+ ///
+ public Task RunTask(string task)
+ {
+ var filePath = Path.Combine(task.LocalPath(), "task.wav");
+ if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
+ throw new Exception("task 音频路径未找到");
+ serviceProvider.GetRequiredService()
+ .TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
+
+ return Task.CompletedTask;
+ }
+ ///
+ /// 获取语音字幕
+ ///
+ /// 采样率
+ /// 采样值(样品)
+ /// 结果流
+ public OfflineStream SoundHandle(int sampleRate, float[] samples)
+ {
+ var stream = OR.CreateStream();
+ stream.AcceptWaveform(sampleRate, samples);
+ OR.Decode(stream);
+ return stream;
+ }
+ }
+}
diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs
index 199534c..cbe1d0a 100644
--- a/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs
+++ b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs
@@ -14,7 +14,6 @@ using System.Threading.Tasks;
using VideoAnalysisCore.Common;
using VideoAnalysisCore.Model;
using VideoAnalysisCore.Model.Enum;
-using static System.Runtime.InteropServices.JavaScript.JSType;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
@@ -32,22 +31,18 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
}
public class SenseVoice
{
- //const string TransducerStr = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
- static OfflineRecognizer OR = default!;
- static OfflineRecognizer OR_old = default!;
- static VadModelConfig VADModelConfig = default!;
- public Repository videoTaskDB { get; set; }
+ static OfflineRecognizer OR = default!;
- private readonly RedisManager redisManager;
+ private readonly IServiceProvider serviceProvider;
- public SenseVoice(Repository videoTaskDB, RedisManager redisManager)
+
+ public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
{
- this.videoTaskDB = videoTaskDB;
- this.redisManager = redisManager;
+ this.serviceProvider = serviceProvider;
}
///
- /// 初始化 SenseVoice
+ /// 初始化 SenseVoice
///
/// 默认6线程
/// 是否使用gpu 报错请看安装CUDA环境
@@ -61,10 +56,9 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.FeatConfig.FeatureDim = 80;
// Path to tokens.txt
var AIModelVersion_270717 = "sherpa-onnx-sense-voice-24-07-17";
- var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
- config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
+ config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
//SenseVoice 模型
- config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
+ config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
//反转文本规范化规则 fst 的路径
@@ -91,54 +85,11 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
//config.MaxActivePaths =4;
#endregion
- #region 热词功能[无效]
- //if (false)
- //{
- // //热词目录
- // config.HotwordsFile = Path.Combine(AppCommon.AIModelFile, "Hotwords.txt");
- // config.DecodingMethod = "modified_beam_search";
- // //热词得分
- // config.HotwordsScore = 1.5f;
-
- // config.ModelConfig.ModelingUnit = "cjkchar+bpe";
- // config.ModelConfig.BpeVocab = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "bpe.model");
- // config.ModelConfig.Transducer = new OfflineTransducerModelConfig()
- // {
- // Decoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "decoder-epoch-99-avg-1.onnx"),
- // Encoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "encoder-epoch-99-avg-1.onnx"),
- // Joiner = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "joiner-epoch-99-avg-1.onnx"),
- // };
- //}
- #endregion
-
#if DEBUG
config.ModelConfig.Debug = 1;
#endif
-
OR = new OfflineRecognizer(config);
-
-
- OfflineRecognizerConfig oldConfig = new OfflineRecognizerConfig();
- //采样率
- oldConfig.FeatConfig.SampleRate = 16000;
- oldConfig.FeatConfig.FeatureDim = 80;
- oldConfig.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
- oldConfig.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
- oldConfig.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
- //反转文本规范化规则 fst 的路径
- //config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst");
-
- oldConfig.ModelConfig.SenseVoice.Language = "zh";
- //模型类型
- oldConfig.ModelConfig.ModelType = string.Empty;
- oldConfig.ModelConfig.NumThreads = numThreads;
- oldConfig.ModelConfig.Provider = "cpu";
- OR_old = new OfflineRecognizer(oldConfig);
-
- VADModelConfig = new VadModelConfig();
- VADModelConfig.SileroVad.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "silero_vad.onnx");
- VADModelConfig.Debug = 0;
}
///
@@ -146,137 +97,42 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
///
///
///
- public async Task> RunTask(Stream s)
+ public List RunTask(Stream s)
{
- if (s is null)
- throw new Exception("音频路径 is null");
- return await TaskHandle(new WaveReader(s), null);
+ if (s is null) throw new Exception("音频路径 is null");
+ return serviceProvider.GetRequiredService()
+ .TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
}
-
///
/// 获取语音字幕
///
///
///
- public async Task RunTask(string task)
+ public Task RunTask(string task)
{
var filePath = Path.Combine(task.LocalPath(), "task.wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
- await TaskHandle(new WaveReader(filePath), task);
- }
+ serviceProvider.GetRequiredService()
+ .TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
- ///
- /// 任务处理
- ///
- /// Wave
- /// 任务id [默认Null]
- ///
- ///
- public async Task> TaskHandle(WaveReader reader, string? task )
- {
- if (OR is null)
- Init();
- int numSamples = reader.Samples.Length;
- int windowSize = VADModelConfig.SileroVad.WindowSize;
- int sampleRate = VADModelConfig.SampleRate;
- int numIter = numSamples / windowSize;
- var totalSecond = numSamples / (float)sampleRate;
- var res = new List(500);
- using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 20);
- for (int i = 0; i != numIter; ++i)
- {
- int start = i * windowSize;
- float[] samples = new float[windowSize];
- Array.Copy(reader.Samples, start, samples, 0, windowSize);
- VAD.AcceptWaveform(samples);
-
- //Memory samples = new float[windowSize];
- //Memory sourceSpan = reader.Samples.AsMemory(start, windowSize);
- //sourceSpan.CopyTo(samples);
- //VAD.AcceptWaveform(samples.ToArray());
-
- //是否检测到语音
- if (VAD.IsSpeechDetected())
- {
- //获取最新的发言片段
- while (!VAD.IsEmpty())
- {
- var p = await ReadNext(VAD,res, totalSecond);
- if (p != null) redisManager.SetTaskProgress(task, p + "%");
- }
- }
- }
- VAD.Flush();
- while (!VAD.IsEmpty())
- {
- var p = await ReadNext(VAD, res, totalSecond);
- if(p!= null) redisManager.SetTaskProgress(task, p + "%");
- }
- //如果携带任务ID
- if (!string.IsNullOrEmpty(task))
- {
- await redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count);
- var captionsStr = res.ToJson();
- await videoTaskDB.AsUpdateable()
- .SetColumns(it => it.Captions == captionsStr)
- .Where(it => it.Id == long.Parse(task))
- .ExecuteCommandAsync();
- await redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Captions", res);
- //分析完成视频字幕后继续接收任务
- //redisManager.NewTask();
- }
- return res;
+ return Task.CompletedTask;
}
///
- /// 处理vad 下一个切片
+ /// 获取语音字幕
///
- ///
- /// 字幕处理后写入数组
- /// 总时长
- /// 任务回调
- ///
- public async Task ReadNext(VoiceActivityDetector VAD, List res, float totalSecond)
+ /// 采样率
+ /// 采样值(样品)
+ /// 结果流
+ public OfflineStream SoundHandle(int sampleRate, float[] samples)
{
- var segment = VAD.Front();
- var sampleRate = VADModelConfig.SampleRate;
- var sampleRateF = (float)VADModelConfig.SampleRate;
- float startTime = segment.Start / sampleRateF;
- float duration = segment.Samples.Length / sampleRateF;
- using var stream = OR.CreateStream();
- stream.AcceptWaveform(sampleRate, segment.Samples);
+ var stream = OR.CreateStream();
+ stream.AcceptWaveform(sampleRate, samples);
OR.Decode(stream);
-
- //old
- using var stream1 = OR_old.CreateStream();
- stream1.AcceptWaveform(sampleRate, segment.Samples);
- OR.Decode(stream1);
- if (stream.Result.Text != stream1.Result.Text)
- {
- Console.WriteLine("=>" + (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero));
- Console.WriteLine("新=>" + stream.Result.Text);
- Console.WriteLine("旧=>" + stream1.Result.Text);
- }
- Console.WriteLine();
- double? resP =null;
- if (!string.IsNullOrEmpty(stream.Result.Text))
- {
- var text = stream.Result.Text.Trim();
- if (text.Length == 1 && text == "。")// 检查字符是否只有一个句号
- {
- VAD.Pop();
- return resP;
- }
- res.Add(new()
- {
- Text = stream.Result.Text,
- Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
- End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
- });
- resP = Math.Round((double)(startTime + duration) / (totalSecond) * 100, 2);
- }
- VAD.Pop();
- return resP;
+ return stream;
}
+
+
+
}
}
diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs b/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs
new file mode 100644
index 0000000..7175942
--- /dev/null
+++ b/VideoAnalysisCore/AICore/SherpaOnnx/SherpaVad.cs
@@ -0,0 +1,210 @@
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Options;
+using SherpaOnnx;
+using SqlSugar;
+using SqlSugar.IOC;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Text.Json;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using VideoAnalysisCore.Common;
+using VideoAnalysisCore.Model;
+using VideoAnalysisCore.Model.Enum;
+using static System.Net.WebRequestMethods;
+
+namespace VideoAnalysisCore.AICore.SherpaOnnx
+{
+ public static class SherpaVadExpand
+ {
+
+ ///
+ /// 添加 Vad 语言切片
+ ///
+ ///
+ public static void AddSherpaVadExpand(this IServiceCollection services)
+ {
+ services.AddTransient();
+ }
+ }
+
+ ///
+ /// 语音切片服务的版本
+ ///
+ public class SherpaVadVersion
+ {
+ public const string silero_vad_v4 = "silero_vad_v4.onnx";
+ public const string silero_vad_v5 = "silero_vad_v5.onnx";
+ ///
+ /// ten_vad (324 kb版本)
+ ///
+ public const string ten_vad_324 = "ten-vad.onnx";
+ }
+ ///
+ /// 语音切片服务
+ ///
+ public class SherpaVad
+ {
+ static VadModelConfig VADModelConfig = default!;
+
+ private readonly RedisManager redisManager;
+ private readonly IServiceProvider serviceProvider;
+ private readonly VoiceActivityDetector vad;
+ private Func Callback;
+
+
+ public SherpaVad(RedisManager redisManager, IServiceProvider serviceProvider)
+ {
+ this.redisManager = redisManager;
+ this.serviceProvider = serviceProvider;
+ VADModelConfig = new VadModelConfig();
+
+ VADModelConfig.SampleRate = 16000;
+ VADModelConfig.NumThreads = 1;
+ VADModelConfig.Provider = "cpu";
+#if DEBUG
+ VADModelConfig.Debug = 1;
+#endif
+ VADModelConfig.SileroVad = new SileroVadModelConfig();
+ VADModelConfig.TenVad = new TenVadModelConfig();
+ }
+
+ ///
+ /// 初始化 SenseVoice
+ ///
+ /// vad识别成功后触发后回调
+ /// 版本采用
+ /// 默认1线程
+ /// 是否使用gpu 报错请看安装CUDA环境
+ private void Init(Func func, string vadVersion = SherpaVadVersion.silero_vad_v5, int numThreads = 1, bool useGPU = false)
+ {
+ VADModelConfig.NumThreads = numThreads;
+ VADModelConfig.Provider = useGPU? "cuda" : "cpu";
+ var path = Path.Combine(AppCommon.AIModelFile, "vad", SherpaVadVersion.silero_vad_v5);
+ switch (vadVersion)
+ {
+ case SherpaVadVersion.silero_vad_v4:
+ case SherpaVadVersion.silero_vad_v5:
+ VADModelConfig.SileroVad.Model = path;
+ break;
+ case SherpaVadVersion.ten_vad_324:
+ VADModelConfig.TenVad.Model = path;
+ break;
+ default:
+ break;
+ }
+ Callback = func;
+ }
+
+ ///
+ /// 任务处理
+ ///
+ /// Wave
+ /// vad识别成功后触发后回调
+ /// 版本采用
+ /// 默认1线程
+ /// 是否使用gpu 报错请看安装CUDA环境
+
+ /// 任务id [默认Null]
+ ///
+ ///
+ public List TaskHandle(WaveReader reader, string? task,Func func, string vadVersion = SherpaVadVersion.silero_vad_v5, int numThreads = 1, bool useGPU = false )
+ {
+ Init(func, vadVersion, numThreads, useGPU);
+ // 使用 Span 操作原始数据
+ ReadOnlySpan allSamples = reader.Samples.AsSpan();
+ int numSamples = allSamples.Length;
+ int windowSize = VADModelConfig.SileroVad.WindowSize;
+ int sampleRate = VADModelConfig.SampleRate;
+ int numIter = numSamples / windowSize;
+ var totalSecond = numSamples / (float)sampleRate;
+ var res = new List(500);
+
+ using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 30);
+
+ // 优化:复用缓冲区,避免在循环中重复分配内存
+ float[] buffer = new float[windowSize];
+
+ for (int i = 0; i != numIter; ++i)
+ {
+ int start = i * windowSize;
+
+ // 使用 Span 高效复制数据到固定缓冲区
+ allSamples.Slice(start, windowSize).CopyTo(buffer);
+
+ VAD.AcceptWaveform(buffer);
+
+ //是否检测到语音
+ if (VAD.IsSpeechDetected())
+ {
+ //获取最新的发言片段
+ while (!VAD.IsEmpty())
+ {
+ var p = ReadNext(VAD,res, totalSecond);
+ if (p != null) redisManager.SetTaskProgress(task, p + "%");
+ }
+ }
+ }
+ VAD.Flush();
+ while (!VAD.IsEmpty())
+ {
+ var p = ReadNext(VAD, res, totalSecond);
+ if(p!= null) redisManager.SetTaskProgress(task, p + "%");
+ }
+ //如果携带任务ID
+ if (!string.IsNullOrEmpty(task))
+ {
+ _ = redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count);
+ var captionsStr = res.ToJson();
+ _ = serviceProvider.GetRequiredService>()
+ .AsUpdateable()
+ .SetColumns(it => it.Captions == captionsStr)
+ .Where(it => it.Id == long.Parse(task))
+ .ExecuteCommandAsync();
+ _ = redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Captions", res);
+ //分析完成视频字幕后继续接收任务
+ //redisManager.NewTask();
+ }
+ return res;
+ }
+ ///
+ /// 处理vad 下一个切片
+ ///
+ ///
+ /// 字幕处理后写入数组
+ /// 总时长
+ ///
+ public double? ReadNext(VoiceActivityDetector VAD, List res, float totalSecond)
+ {
+ var segment = VAD.Front();
+ var sampleRate = VADModelConfig.SampleRate;
+ var sampleRateF = (float)VADModelConfig.SampleRate;
+ float startTime = segment.Start / sampleRateF;
+ float duration = segment.Samples.Length / sampleRateF;
+ using var stream = Callback(sampleRate, segment.Samples);
+ double? resP =null;
+ if (!string.IsNullOrEmpty(stream.Result.Text))
+ {
+ var text = stream.Result.Text.Trim();
+ if (text.Length == 1 && text == "。")// 检查字符是否只有一个句号
+ {
+ VAD.Pop();
+ return resP;
+ }
+ res.Add(new()
+ {
+ Text = stream.Result.Text,
+ Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
+ End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
+ });
+ resP = Math.Round((double)(startTime + duration) / (totalSecond) * 100, 2);
+ }
+ VAD.Pop();
+ return resP;
+ }
+ }
+}
diff --git a/VideoAnalysisCore/Controllers/VideoTaskController.cs b/VideoAnalysisCore/Controllers/VideoTaskController.cs
index 473e3dc..614c208 100644
--- a/VideoAnalysisCore/Controllers/VideoTaskController.cs
+++ b/VideoAnalysisCore/Controllers/VideoTaskController.cs
@@ -129,7 +129,7 @@ namespace VideoAnalysisCore.Controllers
using HttpClient client = new HttpClient();
// 发送GET请求获取网络文件流
using var networkStream = await client.GetStreamAsync(url);
- var res = await senseVoice.RunTask(networkStream);
+ var res = senseVoice.RunTask(networkStream);
return Ok(res);
}
catch (Exception ex)
@@ -143,11 +143,11 @@ namespace VideoAnalysisCore.Controllers
/// 文件流
///
[HttpPost(Name = "AudioRecognition")]
- public async Task AudioRecognition(IFormFile file)
+ public IActionResult AudioRecognition(IFormFile file)
{
using var s = file.OpenReadStream();
- var res = await senseVoice.RunTask(s);
- return Ok(res);
+ var res = senseVoice.RunTask(s);
+ return Ok(res);
}
diff --git a/VideoAnalysisCore/VideoAnalysisCore.csproj b/VideoAnalysisCore/VideoAnalysisCore.csproj
index d7b668b..ccfab38 100644
--- a/VideoAnalysisCore/VideoAnalysisCore.csproj
+++ b/VideoAnalysisCore/VideoAnalysisCore.csproj
@@ -71,7 +71,7 @@
-
+