调试 FunASR的STT,修复流程上的bug
This commit is contained in:
parent
eed63794b8
commit
d948f854fb
|
|
@ -18,7 +18,9 @@ namespace Learn.VideoAnalysis.Expand
|
|||
|
||||
Console.WriteLine($"{DateTime.Now}=>初始化 Coravel");
|
||||
service.AddScheduler();
|
||||
#if !DEBUG
|
||||
service.AddTransient<TaskFileClearJob>();
|
||||
#endif
|
||||
service.AddTransient<NodePackageJob>();
|
||||
}
|
||||
public static void UseCoravelExpand(this IApplicationBuilder provider)
|
||||
|
|
|
|||
|
|
@ -100,7 +100,8 @@ namespace Learn.VideoAnalysis
|
|||
AppCommon.Services = app.Services;
|
||||
app.UseMiddleware<BasicAuthMiddleware>("Swagger");
|
||||
// Configure the HTTP request pipeline.
|
||||
_ = app.Services.GetRequiredService<RedisInit>();
|
||||
//¿ªÆôredis¶ÓÁзþÎñ
|
||||
//_ = app.Services.GetRequiredService<RedisInit>();
|
||||
app.UseSwagger();
|
||||
app.UseSwaggerUI();
|
||||
app.UseExceptionHandler("/Error");
|
||||
|
|
|
|||
|
|
@ -156,8 +156,8 @@ function timeupdateVideo() {
|
|||
(subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end
|
||||
);
|
||||
// 更新字幕 AI优化字幕
|
||||
let subtitleI1 = subtitles1.value.findIndex(
|
||||
(subtitle) => currentTime >= subtitle.start && currentTime <= subtitle.end
|
||||
let subtitleI1 = subtitles1.value.findLastIndex(
|
||||
(subtitle) => currentTime >= subtitle.start
|
||||
);
|
||||
if (subtitleI > -1 && currentSubtitle.value !== subtitles.value[subtitleI].text) {
|
||||
currentSubtitle.value = subtitles.value[subtitleI].text;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
|
@ -34,9 +34,10 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
|
|||
}
|
||||
public class VideoKnowPointDto
|
||||
{
|
||||
public float KnowPointWeight { get; set; }
|
||||
public string KnowPoint { get; set; }
|
||||
public string KnowPointId { get; set; }
|
||||
public float KnowSourceTime { get; set; }
|
||||
public float KnowPointWeight { get; set; }
|
||||
public string KnowSource { get; set; }
|
||||
}
|
||||
public class VideoKnowRes
|
||||
|
|
@ -65,10 +66,6 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
|
|||
/// </summary>
|
||||
public virtual string? KnowPoint { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点权重
|
||||
/// </summary>
|
||||
public virtual float? KnowPointWeight { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点ID
|
||||
/// </summary>
|
||||
public virtual string? KnowPointId { get; set; }
|
||||
|
|
|
|||
|
|
@ -326,7 +326,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
var postMessages =
|
||||
$$"""
|
||||
# Role
|
||||
你是一位{{subject}}学科的教育专家与资深校对。你的任务是将{{sections}}内容的原始语音识别(STT)JSON 数据清洗为高质量教学文本。
|
||||
你是一位{{subject}}学科的教育专家,有着资深字幕校对经验。你的任务是将{{sections}}内容的原始语音识别(STT)JSON 数据清洗为高质量教学文本。
|
||||
# Input & Output Protocol
|
||||
输入和输出均为严格的 JSON 数组格式:`[{"t": number, "r": string}]`。
|
||||
`t` (Time): 绝对锚点,代表时间戳。严禁修改、严禁排序、严禁删除。
|
||||
|
|
@ -434,7 +434,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 作业内容检查
|
||||
/// </summary>
|
||||
/// <param name="taskInfo"></param>
|
||||
/// <param name="captions"></param>
|
||||
/// <param name="sections"></param>
|
||||
/// <returns></returns>
|
||||
private async Task<VideoKnowRes?> DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections)
|
||||
{
|
||||
if (captions is null || string.IsNullOrWhiteSpace(captions.Captions))
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ using System.Text;
|
|||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using UserCenter.Model.Enum;
|
||||
using VideoAnalysisCore.Common;
|
||||
using VideoAnalysisCore.Model;
|
||||
using VideoAnalysisCore.Model.Enum;
|
||||
|
|
@ -49,7 +50,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
/// </summary>
|
||||
/// <param name="numThreads">默认6线程</param>
|
||||
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
||||
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
|
||||
public void Init(SubjectEnum? subject = null, int numThreads = 10, bool useGPU = false, bool useHotwords = false)
|
||||
{
|
||||
Console.WriteLine("初始化 FunASRNano");
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
|
|
@ -63,17 +64,23 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量
|
||||
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
|
||||
//接入的大语言模型
|
||||
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx");
|
||||
//config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder ,"llm.fp16.onnx");
|
||||
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "int8-2025-12-30", "llm.int8.onnx");
|
||||
//插入预训练模型(如Transformer)的小型可训练模块 (如语音识别、情感分析)
|
||||
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
|
||||
//分词器
|
||||
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
|
||||
//提示词
|
||||
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
|
||||
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!";
|
||||
//加上学科为空的处理
|
||||
if (subject != null)
|
||||
config.ModelConfig.FunAsrNano.UserPrompt = $"这是一堂中国{subject}的课堂视频音频,请你帮我分析出它讲述的内容!";
|
||||
else
|
||||
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国课堂的视频音频,请你帮我分析出它讲述的内容!";
|
||||
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
|
||||
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
|
||||
config.ModelConfig.FunAsrNano.TopP = 0.8f;
|
||||
config.ModelConfig.FunAsrNano.TopP = 0.7f;
|
||||
//种子
|
||||
config.ModelConfig.FunAsrNano.Seed = 42;
|
||||
|
||||
//模型类型
|
||||
|
|
@ -81,10 +88,10 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
config.ModelConfig.NumThreads = numThreads;
|
||||
config.ModelConfig.Provider = "cpu";
|
||||
//需要使用GPU
|
||||
if (!useGPU)
|
||||
if (useGPU)
|
||||
config.ModelConfig.Provider = "cuda";
|
||||
#if DEBUG
|
||||
config.ModelConfig.Debug = 1;
|
||||
//config.ModelConfig.Debug = 1;
|
||||
#endif
|
||||
OR = new OfflineRecognizer(config);
|
||||
}
|
||||
|
|
@ -108,13 +115,15 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
/// <returns></returns>
|
||||
public Task RunTask(string task)
|
||||
{
|
||||
var taskInfo = serviceProvider.GetRequiredService<Repository<VideoTask>>().GetById(task);
|
||||
if(taskInfo is null)
|
||||
throw new Exception("task 未找到");
|
||||
var filePath = Path.Combine(task.LocalPath(), "task.wav");
|
||||
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
||||
throw new Exception("task 音频路径未找到");
|
||||
if (OR is null) Init();
|
||||
if (OR is null) Init(taskInfo.Subject);
|
||||
serviceProvider.GetRequiredService<SherpaVad>()
|
||||
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.ten_vad_324);
|
||||
|
||||
.TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.ten_vad_324);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
/// <summary>
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
VADModelConfig.SileroVad = new SileroVadModelConfig();
|
||||
VADModelConfig.SileroVad.Model = path;
|
||||
//(阈值 / 灵敏度) 含义:判定为“语音”的置信度。取值范围通常在 0 到 1 之间。
|
||||
VADModelConfig.SileroVad.Threshold = 0.3f;
|
||||
VADModelConfig.SileroVad.Threshold = 0.25f;
|
||||
//(最小静音长度)秒。 含义:“要沉默多久,我才认为这句话说完了?”
|
||||
VADModelConfig.SileroVad.MinSilenceDuration = 0.2f;
|
||||
// (最小语音长度)秒 含义:“这段声音至少要多长,我才认为它是有效的说话?”
|
||||
|
|
|
|||
|
|
@ -110,12 +110,14 @@ namespace VideoAnalysisCore.Common
|
|||
|
||||
public FFMPGEHandle FFMPGE { get; set; }
|
||||
public SenseVoice senseVoice { get; set; }
|
||||
public FunASRNano funASRNano { get; set; }
|
||||
public RedisManager redisManager { get; set; }
|
||||
|
||||
public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager)
|
||||
public RedisInit(FFMPGEHandle fFMPGE, SenseVoice senseVoice, RedisManager redisManager, FunASRNano funASRNano)
|
||||
{
|
||||
FFMPGE = fFMPGE;
|
||||
this.senseVoice = senseVoice;
|
||||
this.funASRNano = funASRNano;
|
||||
this.redisManager = redisManager;
|
||||
Init();
|
||||
redisManager.InitChannel();
|
||||
|
|
@ -137,7 +139,8 @@ namespace VideoAnalysisCore.Common
|
|||
await scope.ServiceProvider.GetService<DownloadFile>()?.RunTask(task);
|
||||
});
|
||||
SubscribeList.Add(RedisChannelEnum.分离音频, FFMPGE.RunAsync);
|
||||
SubscribeList.Add(RedisChannelEnum.解析字幕, senseVoice.RunTask);
|
||||
//SubscribeList.Add(RedisChannelEnum.解析字幕, senseVoice.RunTask);
|
||||
SubscribeList.Add(RedisChannelEnum.解析字幕, funASRNano.RunTask);
|
||||
//SubscribeList.Add(RedisChannelEnum.解析说话人,Speaker.Run);
|
||||
SubscribeList.Add(RedisChannelEnum.AI课程类型, async (task) =>
|
||||
{
|
||||
|
|
|
|||
|
|
@ -128,6 +128,14 @@ namespace VideoAnalysisCore.Controllers.Dto
|
|||
/// 用户中心的云校id
|
||||
/// </summary>
|
||||
public long? UserCenterCloudSchoolId { get; set; }
|
||||
/// <summary>
|
||||
/// 教材层次
|
||||
/// </summary>
|
||||
public CourselevelTypeEnum? CourseLevel { get; set; }
|
||||
/// <summary>
|
||||
/// 年级
|
||||
/// </summary>
|
||||
public GradeEnum? GradeId { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// 教育阶段
|
||||
|
|
|
|||
|
|
@ -153,8 +153,8 @@ namespace VideoAnalysisCore.Controllers
|
|||
public IActionResult AudioRecognition(IFormFile file)
|
||||
{
|
||||
using var s = file.OpenReadStream();
|
||||
senseVoice.RunTask(s);
|
||||
return Ok();
|
||||
var res = senseVoice.RunTask(s);
|
||||
return Ok(res);
|
||||
}
|
||||
/// <summary>
|
||||
/// 语音识别
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace VideoAnalysisCore.Model.Enum
|
||||
{
|
||||
/// <summary>
|
||||
/// 课程层次
|
||||
/// </summary>
|
||||
public enum CourselevelTypeEnum
|
||||
{
|
||||
一层次 = 1,
|
||||
二层次 = 2,
|
||||
三层次 = 3
|
||||
}
|
||||
}
|
||||
|
|
@ -71,7 +71,7 @@
|
|||
<PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.21" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.22" />
|
||||
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
|
||||
<PackageReference Include="SqlSugar.IOC" Version="2.0.0" />
|
||||
<PackageReference Include="SqlSugarCore" Version="5.1.4.205" />
|
||||
|
|
|
|||
Loading…
Reference in New Issue