Compare commits

..

No commits in common. "374c582cd1a179ef38f30c726d13c53a755080fe" and "b5e174e683f1c0cab9a7d27dc4129a6fd46e16bd" have entirely different histories.

4 changed files with 146 additions and 97 deletions

View File

@ -69,24 +69,16 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|| s.Depth == 2)) || s.Depth == 2))
.Select(s => s.Name).ToArrayAsync(); .Select(s => s.Name).ToArrayAsync();
string title = taskInfo.MediaName; string title = taskInfo.MediaName;
var speakerArr = JsonSerializer.Deserialize<OfflineSpeakerRes[]>(taskInfo.Speaker);
var captionsArr = JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.Captions);
var fileNameResFormat = "{授课章节: string|null}"; var fileNameResFormat = "{授课章节: string|null}";
//var fileNamePostMessages = title +
// " 这是一堂课的标题,请你基于标题帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." +
// $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." +
// $"输出格式 json字符串 对象格式{fileNameResFormat}";
var fileNamePostMessages = title + var fileNamePostMessages = title +
" 这是一堂课的部分授课字幕,请你基于字幕内容帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." + " 这是一堂课的标题,请你基于标题帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." +
$"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." + $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." +
$"输出格式 json字符串 对象格式{fileNameResFormat}"; $"输出格式 json字符串 对象格式{fileNameResFormat}";
var fileNameInfoRes = await ChatAsync<FileNameInfo> var fileNameInfoRes = await ChatAsync<FileNameInfo>
(task, fileNamePostMessages, null);//, "deepseek-chat"); (task, fileNamePostMessages, null);//, "deepseek-chat");
var speakerArr = JsonSerializer.Deserialize<OfflineSpeakerRes[]>(taskInfo.Speaker);
var captionsArr = JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.Captions);
var captions = ExpandFunction.GetSpeakerCaptions(captionsArr, speakerArr); var captions = ExpandFunction.GetSpeakerCaptions(captionsArr, speakerArr);
var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0; var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0;
var criteriaBuilder = new StringBuilder(); var criteriaBuilder = new StringBuilder();

View File

@ -17,7 +17,7 @@ using static System.Runtime.InteropServices.JavaScript.JSType;
namespace VideoAnalysisCore.AICore.SherpaOnnx namespace VideoAnalysisCore.AICore.SherpaOnnx
{ {
public static class SenseVoice public class SenseVoice
{ {
const string TransducerStr = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"; const string TransducerStr = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
static OfflineRecognizer OR =default!; static OfflineRecognizer OR =default!;
@ -104,16 +104,80 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// <returns></returns> /// <returns></returns>
public static async Task<List<SenseVoiceRes>> RunTask(Stream s) public static async Task<List<SenseVoiceRes>> RunTask(Stream s)
{ {
if (OR is null)
Init();
if (s is null) if (s is null)
throw new Exception("音频路径 is null"); throw new Exception("音频路径 is null");
return await TaskHandle(new WaveReader(s));
WaveReader reader = new WaveReader(s);
int numSamples = reader.Samples.Length;
int windowSize = VADModelConfig.SileroVad.WindowSize;
int sampleRate = VADModelConfig.SampleRate;
int numIter = numSamples / windowSize;
var totalSecond = numSamples / (float)sampleRate;
var res = new List<SenseVoiceRes>(500);
//缓冲区大小
var VAD = new VoiceActivityDetector(VADModelConfig, 60);
//var VAD = new VoiceActivityDetector(VADModelConfig, 60);
for (int i = 0; i != numIter; ++i)
{
int start = i * windowSize;
float[] samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize);
VAD.AcceptWaveform(samples);
//是否检测到语音
if (VAD.IsSpeechDetected())
{
while (!VAD.IsEmpty())
{
//获取最新的发言片段
SpeechSegment segment = VAD.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
using OfflineStream stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
OR.Decode(stream);
if (!string.IsNullOrEmpty(stream.Result.Text))
{
res.Add(new()
{
Text = stream.Result.Text,
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
});
}
VAD.Pop();
}
}
}
VAD.Flush();
while (!VAD.IsEmpty())
{
SpeechSegment segment = VAD.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
OR.Decode(stream);
if (!string.IsNullOrEmpty(stream.Result.Text))
{
res.Add(new()
{
Text = stream.Result.Text,
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
});
} }
VAD.Pop();
}
VAD.Reset();
return res;
}
/// <summary> /// <summary>
/// 获取语音字幕 /// 获取语音字幕
/// </summary> /// </summary>
@ -121,57 +185,82 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// <returns></returns> /// <returns></returns>
public static async Task RunTask(string task) public static async Task RunTask(string task)
{ {
if (OR is null)
Init();
var filePath = Path.Combine(task.LocalPath(), task + ".wav"); var filePath = Path.Combine(task.LocalPath(), task + ".wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到"); throw new Exception("task 音频路径未找到");
await TaskHandle(new WaveReader(filePath), task);
}
/// <summary> WaveReader reader = new WaveReader(filePath);
/// 任务处理
/// </summary>
/// <param name="reader">Wave</param>
/// <param name="task">任务id [默认Null]</param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public static async Task<List<SenseVoiceRes>> TaskHandle(WaveReader reader, string? task = null)
{
if (OR is null)
Init();
int numSamples = reader.Samples.Length; int numSamples = reader.Samples.Length;
int windowSize = VADModelConfig.SileroVad.WindowSize; int windowSize = VADModelConfig.SileroVad.WindowSize;
int sampleRate = VADModelConfig.SampleRate; int sampleRate = VADModelConfig.SampleRate;
int numIter = numSamples / windowSize; int numIter = numSamples / windowSize;
var totalSecond = numSamples / (float)sampleRate; var totalSecond = numSamples / (float)sampleRate;
var res = new List<SenseVoiceRes>(500); var res = new List<SenseVoiceRes>(500);
using var VAD = new VoiceActivityDetector(VADModelConfig, 30); var VAD = new VoiceActivityDetector(VADModelConfig, 60);
for (int i = 0; i != numIter; ++i) for (int i = 0; i != numIter; ++i)
{ {
int start = i * windowSize; int start = i * windowSize;
float[] samples = new float[windowSize]; float[] samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize); Array.Copy(reader.Samples, start, samples, 0, windowSize);
VAD.AcceptWaveform(samples); VAD.AcceptWaveform(samples);
//Memory<float> samples = new float[windowSize];
//Memory<float> sourceSpan = reader.Samples.AsMemory(start, windowSize);
//sourceSpan.CopyTo(samples);
//VAD.AcceptWaveform(samples.ToArray());
//是否检测到语音 //是否检测到语音
if (VAD.IsSpeechDetected()) if (VAD.IsSpeechDetected())
{ {
//获取最新的发言片段
while (!VAD.IsEmpty()) while (!VAD.IsEmpty())
await VAD.ReadNext(res, totalSecond, task); {
//获取最新的发言片段
SpeechSegment segment = VAD.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
OR.Decode(stream);
if (!string.IsNullOrEmpty(stream.Result.Text))
{
res.Add(new()
{
Text = stream.Result.Text,
//Text = ExpandFunction.HandleFormula(stream.Result.Text),
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
});
var progress = (float)(startTime + duration) / (totalSecond) * 100;
RedisExpand.SetTaskProgress(task, progress);
}
VAD.Pop();
}
} }
} }
VAD.Flush(); VAD.Flush();
while (!VAD.IsEmpty()) while (!VAD.IsEmpty())
await VAD.ReadNext(res, totalSecond, task);
//如果携带任务ID
if (!string.IsNullOrEmpty(task))
{ {
SpeechSegment segment = VAD.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;
OfflineStream stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
OR.Decode(stream);
if (!string.IsNullOrEmpty(stream.Result.Text))
{
res.Add(new()
{
Text = stream.Result.Text,
//Text = ExpandFunction.HandleFormula(stream.Result.Text),
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
});
}
VAD.Pop();
}
Console.WriteLine(DateTime.Now + "=> SenseVoice 字幕数量"+ res.Count); Console.WriteLine(DateTime.Now + "=> SenseVoice 字幕数量"+ res.Count);
var captionsStr = JsonSerializer.Serialize(res); var captionsStr = JsonSerializer.Serialize(res);
await DbScoped.Sugar await DbScoped.Sugar
.Updateable<VideoTask>() .Updateable<VideoTask>()
@ -184,45 +273,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
RedisExpand.NewTask(); RedisExpand.NewTask();
RedisExpand.InsertChannel(RedisChannelEnum.ChatModelAnalysis, task); RedisExpand.InsertChannel(RedisChannelEnum.ChatModelAnalysis, task);
}
return res;
}
/// <summary>
/// 处理vad 下一个切片
/// </summary>
/// <param name="VAD"></param>
/// <param name="res">字幕处理后写入数组</param>
/// <param name="totalSecond">总时长</param>
/// <param name="task">所属任务id</param>
/// <returns></returns>
public static async Task ReadNext(this VoiceActivityDetector VAD, List<SenseVoiceRes> res, float totalSecond, string? task = null)
{
var segment = VAD.Front();
var sampleRate = VADModelConfig.SampleRate;
var sampleRateF = (float)VADModelConfig.SampleRate;
float startTime = segment.Start / sampleRateF;
float duration = segment.Samples.Length / sampleRateF;
using var stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
OR.Decode(stream);
if (!string.IsNullOrEmpty(stream.Result.Text))
{
var text = stream.Result.Text.Trim();
if (text.Length == 1 && text.First() >= '\uFF00' && text.First() <= '\uFFEF') // 检查字符是否在全角半角字符集的标点符号范围内
{
VAD.Pop();
return;
}
res.Add(new()
{
Text = stream.Result.Text,
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
});
if (!string.IsNullOrEmpty(task))
RedisExpand.SetTaskProgress(task, (double)(startTime + duration) / (totalSecond) * 100);
}
VAD.Pop();
} }
} }
} }

View File

@ -9,7 +9,6 @@ using VideoAnalysisCore.Model;
using System.Text.Json; using System.Text.Json;
using VideoAnalysisCore.Model.Enum; using VideoAnalysisCore.Model.Enum;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using UserCenter.Model.Enum;
namespace VideoAnalysisCore.AICore.SherpaOnnx namespace VideoAnalysisCore.AICore.SherpaOnnx
{ {

View File

@ -14,12 +14,19 @@ namespace VideoAnalysisCore.Common
{ {
public class ExceptionFilter : IAsyncExceptionFilter public class ExceptionFilter : IAsyncExceptionFilter
{ {
public ExceptionFilter() readonly NavigationManager nm;
public ExceptionFilter(NavigationManager nm)
{ {
this.nm = nm;
} }
public async Task OnExceptionAsync(ExceptionContext context) public async Task OnExceptionAsync(ExceptionContext context)
{ {
if (context.Exception is InvalidOperationException)
{
nm.NavigateTo("/Login");
}
else
{ {
// 创建一个包含错误信息的对象 // 创建一个包含错误信息的对象
var errorObject = new var errorObject = new