调试 FunASR的STT,修复流程上的bug
This commit is contained in:
parent
eed63794b8
commit
d948f854fb
|
|
@ -18,7 +18,9 @@ namespace Learn.VideoAnalysis.Expand
|
||||||
|
|
||||||
Console.WriteLine($"{DateTime.Now}=>初始化 Coravel");
|
Console.WriteLine($"{DateTime.Now}=>初始化 Coravel");
|
||||||
service.AddScheduler();
|
service.AddScheduler();
|
||||||
|
#if !DEBUG
|
||||||
service.AddTransient<TaskFileClearJob>();
|
service.AddTransient<TaskFileClearJob>();
|
||||||
|
#endif
|
||||||
service.AddTransient<NodePackageJob>();
|
service.AddTransient<NodePackageJob>();
|
||||||
}
|
}
|
||||||
public static void UseCoravelExpand(this IApplicationBuilder provider)
|
public static void UseCoravelExpand(this IApplicationBuilder provider)
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,8 @@ namespace Learn.VideoAnalysis
|
||||||
AppCommon.Services = app.Services;
|
AppCommon.Services = app.Services;
|
||||||
app.UseMiddleware<BasicAuthMiddleware>("Swagger");
|
app.UseMiddleware<BasicAuthMiddleware>("Swagger");
|
||||||
// Configure the HTTP request pipeline.
|
// Configure the HTTP request pipeline.
|
||||||
_ = app.Services.GetRequiredService<RedisInit>();
|
//¿ªÆôredis¶ÓÁзþÎñ
|
||||||
|
//_ = app.Services.GetRequiredService<RedisInit>();
|
||||||
app.UseSwagger();
|
app.UseSwagger();
|
||||||
app.UseSwaggerUI();
|
app.UseSwaggerUI();
|
||||||
app.UseExceptionHandler("/Error");
|
app.UseExceptionHandler("/Error");
|
||||||
|
|
|
||||||
|
|
@ -156,8 +156,8 @@ function timeupdateVideo() {
|
||||||
(subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end
|
(subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end
|
||||||
);
|
);
|
||||||
// 更新字幕 AI优化字幕
|
// 更新字幕 AI优化字幕
|
||||||
let subtitleI1 = subtitles1.value.findIndex(
|
let subtitleI1 = subtitles1.value.findLastIndex(
|
||||||
(subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end
|
(subtitle) => currentTime >= subtitle.start
|
||||||
);
|
);
|
||||||
if (subtitleI > -1 && currentSubtitle.value !== subtitles.value[subtitleI].text) {
|
if (subtitleI > -1 && currentSubtitle.value !== subtitles.value[subtitleI].text) {
|
||||||
currentSubtitle.value = subtitles.value[subtitleI].text;
|
currentSubtitle.value = subtitles.value[subtitleI].text;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
using Newtonsoft.Json;
|
using Newtonsoft.Json;
|
||||||
using Newtonsoft.Json.Linq;
|
using Newtonsoft.Json.Linq;
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
|
|
@ -34,9 +34,10 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
|
||||||
}
|
}
|
||||||
public class VideoKnowPointDto
|
public class VideoKnowPointDto
|
||||||
{
|
{
|
||||||
public float KnowPointWeight { get; set; }
|
|
||||||
public string KnowPoint { get; set; }
|
public string KnowPoint { get; set; }
|
||||||
public string KnowPointId { get; set; }
|
public string KnowPointId { get; set; }
|
||||||
|
public float KnowSourceTime { get; set; }
|
||||||
|
public float KnowPointWeight { get; set; }
|
||||||
public string KnowSource { get; set; }
|
public string KnowSource { get; set; }
|
||||||
}
|
}
|
||||||
public class VideoKnowRes
|
public class VideoKnowRes
|
||||||
|
|
@ -65,10 +66,6 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public virtual string? KnowPoint { get; set; }
|
public virtual string? KnowPoint { get; set; }
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 知识点权重
|
|
||||||
/// </summary>
|
|
||||||
public virtual float? KnowPointWeight { get; set; }
|
|
||||||
/// <summary>
|
|
||||||
/// 知识点ID
|
/// 知识点ID
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public virtual string? KnowPointId { get; set; }
|
public virtual string? KnowPointId { get; set; }
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 视频分析工作流1
|
/// 视频分析工作流1
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public class GTP_Analysis_1 : IBserGPTWorkflow
|
public class GTP_Analysis_1 : IBserGPTWorkflow
|
||||||
{
|
{
|
||||||
private readonly GeminiGPTClient geminiClient;
|
private readonly GeminiGPTClient geminiClient;
|
||||||
private readonly DeepSeekGPTClient deepSeekClient;
|
private readonly DeepSeekGPTClient deepSeekClient;
|
||||||
|
|
@ -326,7 +326,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
||||||
var postMessages =
|
var postMessages =
|
||||||
$$"""
|
$$"""
|
||||||
# Role
|
# Role
|
||||||
你是一位{{subject}}学科的教育专家与资深校对。你的任务是将{{sections}}内容的原始语音识别(STT)JSON 数据清洗为高质量教学文本。
|
你是一位{{subject}}学科的教育专家,有着资深字幕校对经验。你的任务是将{{sections}}内容的原始语音识别(STT)JSON 数据清洗为高质量教学文本。
|
||||||
# Input & Output Protocol
|
# Input & Output Protocol
|
||||||
输入和输出均为严格的 JSON 数组格式:`[{"t": number, "r": string}]`。
|
输入和输出均为严格的 JSON 数组格式:`[{"t": number, "r": string}]`。
|
||||||
`t` (Time): 绝对锚点,代表时间戳。严禁修改、严禁排序、严禁删除。
|
`t` (Time): 绝对锚点,代表时间戳。严禁修改、严禁排序、严禁删除。
|
||||||
|
|
@ -434,7 +434,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
/// <summary>
|
||||||
|
/// 作业内容检查
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="taskInfo"></param>
|
||||||
|
/// <param name="captions"></param>
|
||||||
|
/// <param name="sections"></param>
|
||||||
|
/// <returns></returns>
|
||||||
private async Task<VideoKnowRes?> DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections)
|
private async Task<VideoKnowRes?> DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections)
|
||||||
{
|
{
|
||||||
if (captions is null || string.IsNullOrWhiteSpace(captions.Captions))
|
if (captions is null || string.IsNullOrWhiteSpace(captions.Captions))
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ using System.Text;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using UserCenter.Model.Enum;
|
||||||
using VideoAnalysisCore.Common;
|
using VideoAnalysisCore.Common;
|
||||||
using VideoAnalysisCore.Model;
|
using VideoAnalysisCore.Model;
|
||||||
using VideoAnalysisCore.Model.Enum;
|
using VideoAnalysisCore.Model.Enum;
|
||||||
|
|
@ -49,7 +50,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="numThreads">默认6线程</param>
|
/// <param name="numThreads">默认6线程</param>
|
||||||
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
||||||
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
|
public void Init(SubjectEnum? subject = null, int numThreads = 10, bool useGPU = false, bool useHotwords = false)
|
||||||
{
|
{
|
||||||
Console.WriteLine("初始化 FunASRNano");
|
Console.WriteLine("初始化 FunASRNano");
|
||||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||||
|
|
@ -63,17 +64,23 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量
|
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量
|
||||||
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
|
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
|
||||||
//接入的大语言模型
|
//接入的大语言模型
|
||||||
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx");
|
//config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder ,"llm.fp16.onnx");
|
||||||
|
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "int8-2025-12-30", "llm.int8.onnx");
|
||||||
//插入预训练模型(如Transformer)的小型可训练模块 (如语音识别、情感分析)
|
//插入预训练模型(如Transformer)的小型可训练模块 (如语音识别、情感分析)
|
||||||
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
|
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
|
||||||
//分词器
|
//分词器
|
||||||
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
|
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
|
||||||
//提示词
|
//提示词
|
||||||
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
|
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
|
||||||
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!";
|
//加上学科为空的处理
|
||||||
|
if (subject != null)
|
||||||
|
config.ModelConfig.FunAsrNano.UserPrompt = $"这是一堂中国{subject}的课堂视频音频,请你帮我分析出它讲述的内容!";
|
||||||
|
else
|
||||||
|
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国课堂的视频音频,请你帮我分析出它讲述的内容!";
|
||||||
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
|
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
|
||||||
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
|
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
|
||||||
config.ModelConfig.FunAsrNano.TopP = 0.8f;
|
config.ModelConfig.FunAsrNano.TopP = 0.7f;
|
||||||
|
//种子
|
||||||
config.ModelConfig.FunAsrNano.Seed = 42;
|
config.ModelConfig.FunAsrNano.Seed = 42;
|
||||||
|
|
||||||
//模型类型
|
//模型类型
|
||||||
|
|
@ -81,10 +88,10 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
config.ModelConfig.NumThreads = numThreads;
|
config.ModelConfig.NumThreads = numThreads;
|
||||||
config.ModelConfig.Provider = "cpu";
|
config.ModelConfig.Provider = "cpu";
|
||||||
//需要使用GPU
|
//需要使用GPU
|
||||||
if (!useGPU)
|
if (useGPU)
|
||||||
config.ModelConfig.Provider = "cuda";
|
config.ModelConfig.Provider = "cuda";
|
||||||
#if DEBUG
|
#if DEBUG
|
||||||
config.ModelConfig.Debug = 1;
|
//config.ModelConfig.Debug = 1;
|
||||||
#endif
|
#endif
|
||||||
OR = new OfflineRecognizer(config);
|
OR = new OfflineRecognizer(config);
|
||||||
}
|
}
|
||||||
|
|
@ -106,15 +113,17 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="task"></param>
|
/// <param name="task"></param>
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
public Task RunTask(string task)
|
public Task RunTask(string task)
|
||||||
{
|
{
|
||||||
|
var taskInfo = serviceProvider.GetRequiredService<Repository<VideoTask>>().GetById(task);
|
||||||
|
if(taskInfo is null)
|
||||||
|
throw new Exception("task 未找到");
|
||||||
var filePath = Path.Combine(task.LocalPath(), "task.wav");
|
var filePath = Path.Combine(task.LocalPath(), "task.wav");
|
||||||
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
||||||
throw new Exception("task 音频路径未找到");
|
throw new Exception("task 音频路径未找到");
|
||||||
if (OR is null) Init();
|
if (OR is null) Init(taskInfo.Subject);
|
||||||
serviceProvider.GetRequiredService<SherpaVad>()
|
serviceProvider.GetRequiredService<SherpaVad>()
|
||||||
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.ten_vad_324);
|
.TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.ten_vad_324);
|
||||||
|
|
||||||
return Task.CompletedTask;
|
return Task.CompletedTask;
|
||||||
}
|
}
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
|
||||||
|
|
@ -90,7 +90,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||||
VADModelConfig.SileroVad = new SileroVadModelConfig();
|
VADModelConfig.SileroVad = new SileroVadModelConfig();
|
||||||
VADModelConfig.SileroVad.Model = path;
|
VADModelConfig.SileroVad.Model = path;
|
||||||
//(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。
|
//(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。
|
||||||
VADModelConfig.SileroVad.Threshold = 0.3f;
|
VADModelConfig.SileroVad.Threshold = 0.25f;
|
||||||
//(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?”
|
//(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?”
|
||||||
VADModelConfig.SileroVad.MinSilenceDuration = 0.2f;
|
VADModelConfig.SileroVad.MinSilenceDuration = 0.2f;
|
||||||
// (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?”
|
// (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?”
|
||||||
|
|
|
||||||
|
|
@ -110,12 +110,14 @@ namespace VideoAnalysisCore.Common
|
||||||
|
|
||||||
public FFMPGEHandle FFMPGE { get; set; }
|
public FFMPGEHandle FFMPGE { get; set; }
|
||||||
public SenseVoice senseVoice { get; set; }
|
public SenseVoice senseVoice { get; set; }
|
||||||
|
public FunASRNano funASRNano { get; set; }
|
||||||
public RedisManager redisManager { get; set; }
|
public RedisManager redisManager { get; set; }
|
||||||
|
|
||||||
public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager)
|
public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager, FunASRNano funASRNano)
|
||||||
{
|
{
|
||||||
FFMPGE = fFMPGE;
|
FFMPGE = fFMPGE;
|
||||||
this.senseVoice = senseVoice;
|
this.senseVoice = senseVoice;
|
||||||
|
this.funASRNano = funASRNano;
|
||||||
this.redisManager = redisManager;
|
this.redisManager = redisManager;
|
||||||
Init();
|
Init();
|
||||||
redisManager.InitChannel();
|
redisManager.InitChannel();
|
||||||
|
|
@ -137,7 +139,8 @@ namespace VideoAnalysisCore.Common
|
||||||
await scope.ServiceProvider.GetService<DownloadFile>()?.RunTask(task);
|
await scope.ServiceProvider.GetService<DownloadFile>()?.RunTask(task);
|
||||||
});
|
});
|
||||||
SubscribeList.Add(RedisChannelEnum.分离音频, FFMPGE.RunAsync);
|
SubscribeList.Add(RedisChannelEnum.分离音频, FFMPGE.RunAsync);
|
||||||
SubscribeList.Add(RedisChannelEnum.解析字幕, senseVoice.RunTask);
|
//SubscribeList.Add(RedisChannelEnum.解析字幕, senseVoice.RunTask);
|
||||||
|
SubscribeList.Add(RedisChannelEnum.解析字幕, funASRNano.RunTask);
|
||||||
//SubscribeList.Add(RedisChannelEnum.解析说话人,Speaker.Run);
|
//SubscribeList.Add(RedisChannelEnum.解析说话人,Speaker.Run);
|
||||||
SubscribeList.Add(RedisChannelEnum.AI课程类型, async (task) =>
|
SubscribeList.Add(RedisChannelEnum.AI课程类型, async (task) =>
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,14 @@ namespace VideoAnalysisCore.Controllers.Dto
|
||||||
/// 用户中心的云校id
|
/// 用户中心的云校id
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public long? UserCenterCloudSchoolId { get; set; }
|
public long? UserCenterCloudSchoolId { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// 教材层次
|
||||||
|
/// </summary>
|
||||||
|
public CourselevelTypeEnum? CourseLevel { get; set; }
|
||||||
|
/// <summary>
|
||||||
|
/// 年级
|
||||||
|
/// </summary>
|
||||||
|
public GradeEnum? GradeId { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 教育阶段
|
/// 教育阶段
|
||||||
|
|
|
||||||
|
|
@ -153,8 +153,8 @@ namespace VideoAnalysisCore.Controllers
|
||||||
public IActionResult AudioRecognition(IFormFile file)
|
public IActionResult AudioRecognition(IFormFile file)
|
||||||
{
|
{
|
||||||
using var s = file.OpenReadStream();
|
using var s = file.OpenReadStream();
|
||||||
senseVoice.RunTask(s);
|
var res = senseVoice.RunTask(s);
|
||||||
return Ok();
|
return Ok(res);
|
||||||
}
|
}
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// 语音识别
|
/// 语音识别
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace VideoAnalysisCore.Model.Enum
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// 课程层次
|
||||||
|
/// </summary>
|
||||||
|
public enum CourselevelTypeEnum
|
||||||
|
{
|
||||||
|
一层次 = 1,
|
||||||
|
二层次 = 2,
|
||||||
|
三层次 = 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -71,7 +71,7 @@
|
||||||
<PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" />
|
<PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
||||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.21" />
|
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.22" />
|
||||||
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
|
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
|
||||||
<PackageReference Include="SqlSugar.IOC" Version="2.0.0" />
|
<PackageReference Include="SqlSugar.IOC" Version="2.0.0" />
|
||||||
<PackageReference Include="SqlSugarCore" Version="5.1.4.205" />
|
<PackageReference Include="SqlSugarCore" Version="5.1.4.205" />
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue