Compare commits
No commits in common. "a2d14487cb9a0c6b8a3ea57a1581afdd6e83b8a3" and "de1cdcf32cd6a63145592018f61630d782169949" have entirely different histories.
a2d14487cb
...
de1cdcf32c
|
|
@ -10,11 +10,6 @@ namespace Learn.VideoAnalysis.Expand
|
|||
{
|
||||
public static class AuthorizeExpand
|
||||
{
|
||||
/// <summary>
|
||||
/// 框架API授权
|
||||
/// </summary>
|
||||
/// <param name="services"></param>
|
||||
/// <returns></returns>
|
||||
public static IServiceCollection AddPermissionAuthentication(this IServiceCollection services)
|
||||
{
|
||||
services.AddAuthentication()
|
||||
|
|
|
|||
|
|
@ -38,28 +38,25 @@ namespace Learn.VideoAnalysis
|
|||
loggingBuilder.SetMinimumLevel(LogLevel.Warning); // 设置最小日志级别为 Warning
|
||||
});
|
||||
|
||||
//绑定 appsetting 配置
|
||||
builder.Configuration.AddAppConfig(args);
|
||||
//初始化 插件
|
||||
builder.Services.AddEndpointsApiExplorer();
|
||||
builder.Services.AddSwaggerExpand("AI视频分析");
|
||||
//°ó¶¨ appsetting ÅäÖÃ
|
||||
builder.Configuration.AddAppConfig(args);
|
||||
builder.Services.AddPermissionAuthentication();
|
||||
builder.Services.AddSqlSugarExpand();
|
||||
builder.Services.AddRedisExpand();
|
||||
//工作流
|
||||
|
||||
builder.Services.AddSimpleTexOcrClient();
|
||||
builder.Services.AddDownloadFileExpand();
|
||||
builder.Services.AddFFMPGEExpand();
|
||||
builder.Services.AddAlibabaCloudVod();
|
||||
builder.Services.AddAliyunOSS();
|
||||
//语音转写
|
||||
builder.Services.AddSenseVoiceExpand();
|
||||
builder.Services.AddFunASRNanoExpand();
|
||||
builder.Services.AddSherpaVadExpand();
|
||||
//builder.Services.AddSpeakerAI();
|
||||
//定时任务
|
||||
builder.Services.AddCoravel();
|
||||
|
||||
//SenseVoice.Init();
|
||||
//异常过滤器
|
||||
builder.Services.AddControllersWithViews(options =>
|
||||
{
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ namespace VideoAnalysisCore.AICore.FFMPGE
|
|||
public static class FFMPGEExpand
|
||||
{
|
||||
/// <summary>
|
||||
/// 添加FFPMPEG拓展
|
||||
/// 添加跨域拓展
|
||||
/// </summary>
|
||||
/// <param name="services"></param>
|
||||
public static void AddFFMPGEExpand(this IServiceCollection services)
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
|
|||
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="Exception"></exception>
|
||||
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model =null, int max_tokens = 32000)
|
||||
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model =null, int max_tokens = 16000)
|
||||
{
|
||||
Message[] messageArr = [
|
||||
new Message(postMessages,"user"),
|
||||
|
|
@ -53,7 +53,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
|
|||
taskId = task,
|
||||
title = title,
|
||||
model = model ?? ChatGPTType.Deepseek_Reasoner,
|
||||
max_tokens = model == ChatGPTType.Deepseek_Reasoner ? 32000 : max_tokens,
|
||||
max_tokens = model == ChatGPTType.Deepseek_Reasoner ? 16000 : max_tokens,
|
||||
stream = true,
|
||||
temperature = 0.2f,
|
||||
messages = messageArr
|
||||
|
|
|
|||
|
|
@ -32,13 +32,6 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
|
|||
public virtual string? Content { get; set; }
|
||||
|
||||
}
|
||||
public class VideoKnowPointDto
|
||||
{
|
||||
public float KnowPointWeight { get; set; }
|
||||
public string KnowPoint { get; set; }
|
||||
public string KnowPointId { get; set; }
|
||||
public string KnowSource { get; set; }
|
||||
}
|
||||
public class VideoKnowRes
|
||||
{
|
||||
/// <summary>
|
||||
|
|
@ -57,18 +50,10 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
|
|||
public virtual long? StageId { get; set; }
|
||||
public virtual VideoQuestionShowDto[]? QuestionArr { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点列表
|
||||
/// </summary>
|
||||
public virtual VideoKnowPointDto[]? KnowPoints { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点
|
||||
/// </summary>
|
||||
public virtual string? KnowPoint { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点权重
|
||||
/// </summary>
|
||||
public virtual float? KnowPointWeight { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点ID
|
||||
/// </summary>
|
||||
public virtual string? KnowPointId { get; set; }
|
||||
|
|
@ -80,11 +65,6 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
|
|||
/// 内容总结
|
||||
/// </summary>
|
||||
public virtual string? Content { get; set; }
|
||||
/// <summary>
|
||||
/// 教材来源
|
||||
/// <para> 课本/试卷/挹青苑 ...</para>
|
||||
/// </summary>
|
||||
public virtual string? TextbookSource { get; set; }
|
||||
|
||||
}
|
||||
public class FileNameInfo
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@ using System.IO;
|
|||
using VideoAnalysisCore.AICore.GPT.ChatGPT;
|
||||
using System.Threading.Tasks;
|
||||
using System.Text.Json;
|
||||
using FFmpeg.NET.Services;
|
||||
using NetTaste;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.GPT
|
||||
{
|
||||
|
|
@ -89,7 +87,7 @@ namespace VideoAnalysisCore.AICore.GPT
|
|||
var splitCount = "data:".Length;
|
||||
var maxLoop = 50 * 200;
|
||||
int threshold = 0;
|
||||
var startTime = DateTime.Now;
|
||||
var startTime= DateTime.Now;
|
||||
var endTime = startTime.AddHours(1.5);
|
||||
//最长分析分析时间1.5小时 或者重试读取 1w次
|
||||
while (maxLoop > 0 && DateTime.Now < endTime)
|
||||
|
|
@ -158,7 +156,7 @@ namespace VideoAnalysisCore.AICore.GPT
|
|||
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="Exception"></exception>
|
||||
public async Task<T> ChatAsync<T>(ChatRequest chatRep) where T:class,new()
|
||||
public async Task<T> ChatAsync<T>(ChatRequest chatRep)
|
||||
{
|
||||
var tryCount = 10;
|
||||
while (tryCount-- > 0)
|
||||
|
|
@ -188,14 +186,8 @@ namespace VideoAnalysisCore.AICore.GPT
|
|||
|
||||
if (string.IsNullOrEmpty(chatResContent))
|
||||
throw new Exception($"GPT返回结果无有效JSON =>{chatResp?.res}");
|
||||
var startsStr = "{";
|
||||
var endStr = "}";
|
||||
var resT = new T();
|
||||
if (resT is Array || resT is System.Collections.IList || resT is System.Collections.IList)
|
||||
{
|
||||
startsStr = "[";
|
||||
endStr = "]";
|
||||
}
|
||||
var startsStr = typeof(T).IsArray ? "[" : "{";
|
||||
var endStr = typeof(T).IsArray ? "]" : "}";
|
||||
if (!chatResContent.StartsWith(startsStr))
|
||||
chatResContent = startsStr + chatResContent;
|
||||
if (!chatResContent.EndsWith(endStr))
|
||||
|
|
@ -279,7 +271,7 @@ namespace VideoAnalysisCore.AICore.GPT
|
|||
/// <returns></returns>
|
||||
/// <exception cref="Exception"></exception>
|
||||
public virtual Task<T> ChatAsync<T>(string task, string postMessages, string title,
|
||||
string model = null, int max_tokens = 16000) where T : class, new()
|
||||
string model = null, int max_tokens = 16000)
|
||||
{
|
||||
throw new Exception("需要实现");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ using System.Collections.Generic;
|
|||
using UserCenter.Model.Enum;
|
||||
using Dm.filter;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
||||
{
|
||||
|
|
@ -38,7 +37,6 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
private readonly RedisManager redisManager;
|
||||
private readonly Repository<VideoTask> videoTaskDB;
|
||||
private readonly Repository<VideoKonwPoint> videoKonwPointDB;
|
||||
private readonly Repository<VideoTaskStage> videoTaskStageDB;
|
||||
private readonly Repository<VideoQuestion> videoQuestionDB;
|
||||
private readonly Repository<VideoQuestionKonw> videoQuestionKonwDB;
|
||||
private readonly Repository<KnowledgeInfo> knowledgeInfoDB;
|
||||
|
|
@ -51,7 +49,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
/// <param name="logger"></param>
|
||||
public GTP_Analysis_1(DeepSeekGPTClient moonshotClient, Repository<CourseGradingCriteria> criteria, Repository<VideoTask> videoTaskDB,
|
||||
Repository<KnowledgeInfo> knowledgeInfoDB, Repository<VideoKonwPoint> videoKonwPointDB, SimpLetexClient simpLetexClient,
|
||||
Repository<VideoQuestion> videoQuestionDB, OssClient ossClient, Repository<VideoQuestionKonw> videoQuestionKonwDB, RedisManager redisManager, BestAIClient chatGPTClient, GeminiGPTClient geminiClient, Repository<VideoTaskStage> videoTaskStageDB)
|
||||
Repository<VideoQuestion> videoQuestionDB, OssClient ossClient, Repository<VideoQuestionKonw> videoQuestionKonwDB, RedisManager redisManager, BestAIClient chatGPTClient, GeminiGPTClient geminiClient)
|
||||
{
|
||||
deepSeekClient = moonshotClient;
|
||||
criteriaDB = criteria;
|
||||
|
|
@ -65,13 +63,12 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
this.redisManager = redisManager;
|
||||
this.chatGPTClient = chatGPTClient;
|
||||
this.geminiClient = geminiClient;
|
||||
this.videoTaskStageDB = videoTaskStageDB;
|
||||
}
|
||||
/// <summary>
|
||||
/// 获取分段内容对应的章节知识点
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private async Task<List<VideoKonwPoint>> GetVideoKnow(List<VideoKnowRes> questionRes, VideoTask taskInfo,
|
||||
private async Task<List<VideoKonwPoint>> GetVideoKnow(VideoKnowRes[] questionRes, VideoTask taskInfo,
|
||||
string sections, List<KnowledgeInfo> knowledgeInfos)
|
||||
{
|
||||
var knows = string.Join(',', knowledgeInfos.Select(s => s.Id + "|" + s.Name));
|
||||
|
|
@ -80,44 +77,30 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
.GroupBy(s => s.Name)
|
||||
.ToDictionary(s => s.First().Name, s => s.First().Id);
|
||||
questionRes = questionRes.Where(s => s != null)
|
||||
.OrderBy(s => s.StartTime).ToList();
|
||||
.OrderBy(s => s.StartTime).ToArray();
|
||||
var thems = questionRes.Adapt<VideoKnowQueryDto[]>().ToJson();
|
||||
var checkResFormat1 = """[{"StartTime":12.3,"TextbookSource":"课本","KnowPoints":[{"KnowPointWeight":0.5,"KnowSource":"开始时间(秒),匹配的原因","KnowPoint":"知识点名称","KnowPointId":"123"}]}]""";
|
||||
var checkResFormat1 = """[{"StartTime":开始秒(number),"KnowPoint":知识点名称(string),"KnowPointId":知识点Id(string)}]""";
|
||||
var knowMessages =
|
||||
$"""
|
||||
角色:你是一位{taskInfo.Subject}学科教研老师。
|
||||
任务:为每个【视频分段】分配对应的知识点(可多个),并补充来源信息。
|
||||
字段说明:
|
||||
- TextbookSource:该分段讲授内容所属教材来源,仅允许取值:课本/试卷/挹青苑/其他
|
||||
- KnowPoints:数组。每个元素代表一个知识点匹配结果
|
||||
- KnowPoint:知识点名称(必须来自我提供的列表)
|
||||
- KnowPointId:知识点ID(必须与 KnowPoint 对应)
|
||||
- KnowPointWeight:知识点在本分段的占比权重(最高为1,单个分段内的知识点权重之和必须等于1)
|
||||
- KnowSource:该知识点在字幕中被首次提及的“开始秒,为什么匹配的原因(最多50字)”
|
||||
强制约束(硬性条件):
|
||||
1) 输出数组长度必须与输入分段长度一致,且顺序一致;不得新增/删除/合并分段。
|
||||
2) 每个对象的 StartTime 必须与输入分段的 StartTime 完全一致(不得改动)。
|
||||
3) KnowPoints 允许为空数组(表示未能匹配任何知识点),但不要输出空字符串或用逗号拼接。
|
||||
4) 只输出 JSON(禁止 Markdown/解释/额外文本)。
|
||||
输入分段:{thems}
|
||||
章节:{sections}
|
||||
知识点列表(Id|Name):{knows}
|
||||
输出格式示例:{checkResFormat1}
|
||||
""";
|
||||
$"我针对{taskInfo.Subject}课堂授课视频分析出了视频的授课阶段片段。\n" +
|
||||
$"现在需要你通过每个片段的内容总结来分配正确的知识点(单个片段允许多个知识点用逗号','分割)。\n" +
|
||||
$"这是我的分段 {thems}。\n" +
|
||||
$"课堂内容与{sections}章节相关\n" +
|
||||
$"最后请确保分配的知识点是用户提供的,并且一定正确合理!\n" +
|
||||
$"返回的片段数量与传入片段数量一致(硬性条件)!\n" +
|
||||
$"输出内容只返回json格式({checkResFormat1})\n" +
|
||||
$" 格式 (方法点Id|方法点名称) \n" +
|
||||
$"提供的`知识点名称({knows})。\n";
|
||||
await redisManager.AddTaskLog(taskInfo.Id, "==>2.开始分析视频内容知识点");
|
||||
List<VideoKnowRes> konwRes;
|
||||
VideoKnowRes[] konwRes;
|
||||
var knowOK = false;
|
||||
var chatClentArr = new GPTClient[] { chatGPTClient, geminiClient, deepSeekClient };
|
||||
var chatClentArr = new GPTClient[] { chatGPTClient, geminiClient,deepSeekClient };
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
konwRes = await chatClentArr[i].ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), knowMessages, "知识点");
|
||||
konwRes = await chatClentArr[i].ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), knowMessages, "知识点");
|
||||
// 分析结果的片段数量与预期不匹配
|
||||
if (questionRes.Count() != konwRes.Count()) continue;
|
||||
if (questionRes.Length != konwRes.Length) continue;
|
||||
for (int xi = 0; xi < konwRes.Count(); xi++)
|
||||
{
|
||||
questionRes[xi].KnowPoints = konwRes[xi].KnowPoints;
|
||||
questionRes[xi].TextbookSource = konwRes[xi].TextbookSource;
|
||||
}
|
||||
questionRes[xi].KnowPoint = konwRes[xi].KnowPoint;
|
||||
knowOK = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -128,12 +111,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
}
|
||||
|
||||
return questionRes
|
||||
.Where(s => s.KnowPoints != null && s.KnowPoints.Length > 0)
|
||||
.Where(s => !string.IsNullOrEmpty(s.KnowPoint))
|
||||
.SelectMany(
|
||||
s =>
|
||||
{
|
||||
var ks = s.KnowPoint.Split(",").Distinct();
|
||||
var StageId = Yitter.IdGenerator.YitIdHelper.NextId();
|
||||
return s.KnowPoints.Where(x => knowDic.ContainsKey(x.KnowPoint))
|
||||
return ks.Where(x => knowDic.ContainsKey(x))
|
||||
.Select(x => new VideoKonwPoint()
|
||||
{
|
||||
Content = s.Content,
|
||||
|
|
@ -141,11 +125,8 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
StartTime = s.StartTime,
|
||||
EndTime = s.EndTime,
|
||||
StageId = StageId,
|
||||
KnowPoint = x.KnowPoint,
|
||||
KnowPointWeight=x.KnowPointWeight,
|
||||
TextbookSource = s.TextbookSource,
|
||||
KnowSource = x.KnowSource,
|
||||
KnowPointId = knowDic[x.KnowPoint].ToString(),
|
||||
KnowPoint = x,
|
||||
KnowPointId = knowDic[x].ToString(),
|
||||
TagId = taskInfo.TagId,
|
||||
VideoTaskId = taskInfo.Id,
|
||||
CloudSchoolId = taskInfo.CloudSchoolId,
|
||||
|
|
@ -202,28 +183,29 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
/// 检查AI切片结果质量
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private async Task<CheckMessageDto> VerifySpanQuality(List<VideoKnowRes> questionRes, VideoTask taskInfo, TotalCaptionsDto captions, string sections, long course_Id)
|
||||
private async Task<CheckMessageDto> VerifySpanQuality(VideoKnowRes[] questionRes, VideoTask taskInfo, TotalCaptionsDto captions, string sections, long course_Id)
|
||||
{
|
||||
//校验结果质量
|
||||
var thems = questionRes.Adapt<VideoKnowQueryDto[]>().ToJson();
|
||||
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.复习
|
||||
? "这堂课是习题课,所讲解内容几乎都是试题。"
|
||||
: string.Empty;
|
||||
var checkResFormat = """{"Score":65.5,"MinusScore":"简洁的扣分原因","Suggestion":"简洁的改进建议"}""";
|
||||
var checkResFormat = """{"Score":打分(number),"MinusScore":简洁的扣分原因(string)",Suggestion":改进建议(string)""";//,"Data":优化后的分段(array)}""";
|
||||
var checkMessage =
|
||||
$"""
|
||||
请你担任一位专业的视频内容分析教研老师,擅长评估视频内容的结构和逻辑流暢度。
|
||||
核心任务: 请根据我提供的【视频分段方案】【完整字幕文本】,对该分段方案进行严谨评估。
|
||||
补充上下文:{pptFormat}
|
||||
本节课所属章节:{sections}
|
||||
评估维度与具体标准:
|
||||
时间间隔检查(硬性指标)
|
||||
严格检查每个分段与下一个分段的开始时间之间的间隔是否大于40秒。
|
||||
内容结构与主题合理性:
|
||||
分段准确性:评估单个分段内的课堂字幕内容与分段的Theme/Conten匹配、是否存在错误,捏造的情况(硬性指标)。
|
||||
知识点分配:检查分段内的知识点是否与分段Conten有关联,知识点分配给这个分段是否合理(硬性指标)。
|
||||
分段方案的准确度:评估单个分段内的Content是否与对应时间段内的字幕文本内容匹配(硬性指标,不能存在捏造的内容)
|
||||
主题凝聚力:评估单个分段内的内容是否围绕一个清晰、统一的主题展开,是否存在主题混杂或跳跃的情况。
|
||||
逻辑过渡:评估分段之间的过渡是否自然流畅,后一段是否是前一段内容的合理延伸或转折。
|
||||
知识点分配:检查分段内的知识点是否与分段内容有关联,知识点分配给这个分段是否合理(这项很重要)。
|
||||
综合评分:
|
||||
请基于以上分析,提供一个0-100的综合得分(70分及格,打分一定要严谨,总分一定要准确)。
|
||||
MinusScore: 详细说明打分理由,并逐条对应到上述评估维度。
|
||||
Suggestion: 基于扣分原因提出针对分段方案的改进意见(请忽略掉分段没有结束时间的问题我会自己处理)。
|
||||
请基于以上分析,提供一个0-100的综合得分(70分及格)。
|
||||
详细说明打分理由,并逐条对应到上述评估维度。
|
||||
输入数据格式说明:
|
||||
分段方案: {thems}
|
||||
字幕文本: 格式为说话人:开始秒:结束秒:内容|下一段字幕。完整内容为:{captions.Captions}
|
||||
|
|
@ -232,50 +214,6 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
""";
|
||||
return await chatGPTClient.ChatAsync<CheckMessageDto>(taskInfo.Id.ToString(), checkMessage, "结果检查");
|
||||
}
|
||||
/// <summary>
|
||||
/// 采用改进意见
|
||||
/// </summary>
|
||||
/// <param name="questionRes"></param>
|
||||
/// <param name="taskInfo"></param>
|
||||
/// <param name="captions"></param>
|
||||
/// <param name="sections"></param>
|
||||
/// <param name="suggestion"></param>
|
||||
/// <returns></returns>
|
||||
private async Task<List<VideoKnowRes>?> ImproveSpanBySuggestion(List<VideoKnowRes> questionRes, VideoTask taskInfo, TotalCaptionsDto captions, string sections, string suggestion)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(suggestion))
|
||||
return null;
|
||||
|
||||
var thems = questionRes.ToJson();
|
||||
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.复习
|
||||
? "这堂课是习题课,所讲解内容几乎都是试题。"
|
||||
: string.Empty;
|
||||
var resFormat = """[{"StartTime":0.0,"EndTime":12.3,"Theme":"主题","Content":"内容总结","KnowPoint":"知识点(可为空)"}]""";
|
||||
var message =
|
||||
$"""
|
||||
请你担任一位专业的视频内容分析教研老师,擅长根据评估意见修复视频分段方案。
|
||||
目标:在不改变片段数量的前提下,基于【改进意见】对【分段方案】做最小必要修改,使其更符合课堂内容的自然结构。
|
||||
补充上下文:{pptFormat}
|
||||
本节课所属章节:{sections}
|
||||
强制约束:
|
||||
1) 输出数组长度必须与输入分段方案一致(硬性条件)。
|
||||
2) StartTime 必须严格递增。
|
||||
3) Content 必须完全基于字幕文本可推断的信息,禁止捏造不存在的内容(硬性条件)。
|
||||
4) 忽略对于每段的结束时间的优化处理。
|
||||
5) 不要新增字段,不要输出解释性文字,只输出 JSON。
|
||||
输入数据:
|
||||
分段方案:{thems}
|
||||
改进意见:{suggestion}
|
||||
字幕文本:格式为说话人:开始秒:结束秒:内容|下一段字幕。完整内容为:{captions.Captions}
|
||||
输出格式(仅 JSON):{resFormat}
|
||||
""";
|
||||
|
||||
var improved = await geminiClient.ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), message, "分段优化");
|
||||
if (improved is null || improved.Count() != questionRes.Count())
|
||||
return null;
|
||||
|
||||
return improved.OrderBy(s => s.StartTime ?? 0).ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 优化字幕
|
||||
|
|
@ -284,7 +222,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
private async Task<SenseVoiceRes[]> OptimizeSubtitles(VideoTask taskInfo,
|
||||
SenseVoiceRes[] captionsArr, string sections)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(taskInfo.CaptionsAI) && taskInfo.CaptionsAI!="[]")
|
||||
if (!string.IsNullOrEmpty(taskInfo.CaptionsAI))
|
||||
return JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.CaptionsAI);
|
||||
var subject = taskInfo.Subject.ToString();
|
||||
var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length);
|
||||
|
|
@ -292,7 +230,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
var totalCount = captionsArr.Length / spanCount + 1;
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化");
|
||||
|
||||
var chatClentArr = new GPTClient[] { deepSeekClient, chatGPTClient, geminiClient };
|
||||
var chatClentArr = new GPTClient[] { deepSeekClient,chatGPTClient, geminiClient };
|
||||
await Parallel.ForAsync(0, totalCount,
|
||||
new ParallelOptions() { MaxDegreeOfParallelism = 1 },
|
||||
async (s, c) =>
|
||||
|
|
@ -319,10 +257,10 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
$"待优化字幕内容:\n" +
|
||||
$"{nowCaptionStr}\n" +
|
||||
$"最终核对:请确保输出 JSON 中包含的字幕条数与输入的字幕条数完全对应。";
|
||||
List<string>? resData = null;
|
||||
string[]? resData = null;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
resData = await chatClentArr[i].ChatAsync<List<string>>(taskInfo.Id.ToString(), postMessages, "优化字幕", ChatGPTType.Deepseek_Chat, 8000);
|
||||
resData = await chatClentArr[i].ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕", ChatGPTType.Deepseek_Chat, 8000);
|
||||
if (resData.Count() == cArr.Count())
|
||||
break;
|
||||
else
|
||||
|
|
@ -331,7 +269,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
|
||||
if (resData.Count() != cArr.Count())
|
||||
{
|
||||
resData = cStrArr.ToList();
|
||||
resData = cStrArr.ToArray();
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化 分段{s} AI结果数量不匹配 采用原始值");
|
||||
}
|
||||
newCaptionsList.AddRange(resData.Select((text, i) => new SenseVoiceRes()
|
||||
|
|
@ -355,7 +293,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
/// 视频AI分析字幕
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private async Task<List<VideoKnowRes>> Analytics(VideoTask taskInfo,
|
||||
private async Task<VideoKnowRes[]> Analytics(VideoTask taskInfo,
|
||||
TotalCaptionsDto captions, string sections)
|
||||
{
|
||||
var tryCount = 10;
|
||||
|
|
@ -368,30 +306,32 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
? $"请分析授课中字幕描述的知识内容,然后基于视频整体知识点讲解提炼出不同的阶段以便对老师上课内容切片提取为知识库,所以请确保阶段的内容准确性"
|
||||
: $"授课中老师的PPT在这些时间段内进行了切换{taskInfo.PPTKeyFrame},理应这些时间段内的讲述内容也发生了变化,请你基于PPT变化时间点结合字幕描述的知识内容提炼出不同的切片。" +
|
||||
$"每个阶段的起始和结束应接近这些时间点(例如,以时间点为中心,扩展至内容自然过渡处)。";
|
||||
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":阶段主题(string),"Content":内容总结(string)}]""";
|
||||
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]""";
|
||||
var reviewStr = taskInfo?.VideoType == AttachmentsInfoType.复习
|
||||
? $"但本堂课是习题课,所以大部分阶段是不同的例题讲解内容。\n"
|
||||
: string.Empty;
|
||||
var postMessages = string.Empty;
|
||||
postMessages =
|
||||
$"请通过视频字幕内容分析出视频中课堂的授课知识点切片\n" +
|
||||
$"阶段的细分程度到某个知识点的讲解/认识/例题/总结\n" +
|
||||
$"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。\n" +
|
||||
$"完整的课堂标准流程包含以下5个阶段:课程引入/新知讲解/例题精讲/课堂练习/知识总结。\n" +
|
||||
reviewStr +
|
||||
$"讲解知识内容的阶段的细分程度到某个知识点的讲解/认识/例题/总结\n" +
|
||||
$"不分析课堂作业相关的内容我已经预处理了\n" +
|
||||
$"初步划分阶段:{keyFrameStr}\n" +
|
||||
$"Stage:判断阶段类型如果内容以解题为主,归类为“例题精讲”;如果涉及新知识讲解,归类为“新知讲解”;以此类推。\n" +
|
||||
$"Content:简述单个阶段的核心讲解内容40~150字(如“例题”“证明”“练习”“总结”...), 必须完全基于字幕文本可推断的信息,禁止捏造不存在的内容(硬性条件)。\n" +
|
||||
$"Theme:理解Content,提炼一个精确的主题(例如,“柯西不等式的基本应用”)。\n" +
|
||||
$"输出要求:确保阶段划分合理、无重叠、\n" +
|
||||
$"作业布置阶段一般出现在末尾如果有" +
|
||||
$"\n" +
|
||||
$"内容分析:对每个时间段,提取主要讲解内容:识别关键词(如“例题”“证明”“练习”“总结”)和内容结构。\n" +
|
||||
$"判断阶段类型:如果内容以解题为主,归类为“例题精讲”;如果涉及新知识讲解,归类为“新知讲解”;以此类推。\n" +
|
||||
$"内容总结:简述该阶段的核心讲解内容70~200字,确保内容与阶段时间内授课内容符合。\n" +
|
||||
$"阶段主题:基于内容总结,提炼一个恰当的主题(例如,“柯西不等式的基本应用”)。\n" +
|
||||
$"输出要求:确保阶段划分合理、无` 重叠,且时长符合要求,并且每个阶段的时长需要超过60秒如果时长不够去考虑合并到相邻的阶段\n" +
|
||||
$"输出格式要求:内容只返回json格式({resFormat})\n" +
|
||||
$"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。\n" +
|
||||
$"字幕列表 {captions.Captions} 字幕结束!";
|
||||
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"开始分析视频内容 {tryCount}");
|
||||
var res = await geminiClient.ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), postMessages, "分析字幕");
|
||||
//return await chatGPTClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
|
||||
var res = await geminiClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
|
||||
//var r2 = await chatClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
|
||||
return res;
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
|
@ -402,122 +342,6 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
return null;
|
||||
}
|
||||
|
||||
private async Task<VideoKnowRes?> DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections)
|
||||
{
|
||||
if (captions is null || string.IsNullOrWhiteSpace(captions.Captions))
|
||||
return null;
|
||||
|
||||
var parts = captions.Captions
|
||||
.Split('|', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
if (parts.Length == 0)
|
||||
return null;
|
||||
|
||||
var tail = string.Join('|', parts.Skip(Math.Max(0, parts.Length - 80)));
|
||||
var resFormat = """{"StartTime":123.4,"EndTime":456.7,"Stage":"作业布置|无作业","Theme":"课后作业布置","Content":"作业内容(可包含条目)"}""";
|
||||
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.复习
|
||||
? "这堂课是习题课,作业可能是布置练习/订正/讲义整理。"
|
||||
: string.Empty;
|
||||
|
||||
var message =
|
||||
$"""
|
||||
请你担任一位专业的课堂教研老师,擅长从课堂结尾字幕中识别老师布置的课后作业内容。
|
||||
目标:只分析我提供的【最后80条字幕】,判断是否存在作业布置,并提取作业内容,输出为一个 VideoKnowRes 对象。
|
||||
补充上下文:{pptFormat}
|
||||
本节课所属章节:{sections}
|
||||
识别要点(供参考):作业/课后/回家/练习/习题/订正/预习/背诵/阅读/抄写/完成/下节课/交作业 等。
|
||||
强制约束:
|
||||
1) 必须完全基于字幕文本,不允许捏造。
|
||||
2) 如果没有明确作业布置:Stage 必须输出为“无作业”,Content 输出空字符串。
|
||||
3) 如果识别到作业布置:Stage 必须输出为“作业布置”,Theme 固定为“课后作业布置”。
|
||||
4) StartTime/EndTime 尽量取与作业布置相关字幕的起止秒(从字幕文本中推断)。
|
||||
4) 只输出 JSON,不要输出任何解释。
|
||||
输入字幕(最后50条,格式为 说话人:开始秒:结束秒:内容|下一条字幕):
|
||||
{tail}
|
||||
输出格式(仅 JSON):{resFormat}
|
||||
""";
|
||||
|
||||
var res = await deepSeekClient.ChatAsync<VideoKnowRes>(taskInfo.Id.ToString(), message, "作业布置识别", ChatGPTType.Deepseek_Chat, 8000);
|
||||
if (res is null)
|
||||
return null;
|
||||
if (!string.Equals(res.Stage, "作业布置", StringComparison.OrdinalIgnoreCase))
|
||||
return null;
|
||||
if (string.IsNullOrWhiteSpace(res.Content))
|
||||
return null;
|
||||
return res;
|
||||
}
|
||||
|
||||
private VideoKnowRes[] MergeHomeworkStage(VideoKnowRes[] segments, VideoKnowRes homeworkStage, float maxVideoTime)
|
||||
{
|
||||
if (homeworkStage is null)
|
||||
return segments;
|
||||
|
||||
if (segments is null)
|
||||
segments = [];
|
||||
|
||||
var ordered = segments
|
||||
.Where(s => s != null)
|
||||
.OrderBy(s => s.StartTime ?? 0)
|
||||
.ToList();
|
||||
|
||||
if (ordered.Any(s =>
|
||||
(!string.IsNullOrWhiteSpace(s.Stage) && s.Stage.Contains("作业")) ||
|
||||
(!string.IsNullOrWhiteSpace(s.Theme) && s.Theme.Contains("作业"))))
|
||||
return ordered.ToArray();
|
||||
|
||||
var end = homeworkStage.EndTime ?? maxVideoTime;
|
||||
if (end <= 0)
|
||||
return ordered.ToArray();
|
||||
|
||||
var start = homeworkStage.StartTime ?? Math.Max(0, end - 120);
|
||||
|
||||
if (end - start < 1)
|
||||
{
|
||||
start = Math.Max(0, end - 30);
|
||||
if (end - start < 1)
|
||||
end = start + 30;
|
||||
}
|
||||
|
||||
if (maxVideoTime > 0 && end > maxVideoTime)
|
||||
end = maxVideoTime;
|
||||
|
||||
if (ordered.Count > 0)
|
||||
{
|
||||
var last = ordered[^1];
|
||||
var lastStart = last.StartTime ?? 0;
|
||||
var lastEnd = last.EndTime ?? lastStart;
|
||||
|
||||
if (start - lastEnd > 40)
|
||||
start = lastEnd;
|
||||
|
||||
if (start <= lastStart)
|
||||
start = lastStart + 0.01f;
|
||||
|
||||
if (start >= end)
|
||||
{
|
||||
end = start + 30;
|
||||
if (maxVideoTime > 0 && end > maxVideoTime)
|
||||
end = maxVideoTime;
|
||||
}
|
||||
|
||||
if (last.EndTime is null || last.EndTime > start)
|
||||
last.EndTime = start;
|
||||
}
|
||||
|
||||
var homeworkContent = homeworkStage.Content;
|
||||
var homeworkTheme = string.IsNullOrWhiteSpace(homeworkStage.Theme) ? "课后作业布置" : homeworkStage.Theme;
|
||||
|
||||
ordered.Add(new VideoKnowRes()
|
||||
{
|
||||
StartTime = start,
|
||||
EndTime = end,
|
||||
Stage = "作业布置",
|
||||
Theme = homeworkTheme,
|
||||
Content = homeworkContent
|
||||
});
|
||||
|
||||
return ordered.ToArray();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -587,7 +411,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
$"输出内容只返回json格式为({resFormat})" +
|
||||
$"以下是试题内容" +
|
||||
$"`{sRes.Result.res.value}`";
|
||||
var resData = await deepSeekClient.ChatAsync<List<VideoQuestionOSSDto>>(taskInfo.Id.ToString(), postMessages, "提取试题");
|
||||
var resData = await deepSeekClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
|
||||
//var resData = await chatClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
|
||||
if (resData is null || resData.Count() == 0)
|
||||
break;
|
||||
|
|
@ -700,74 +524,25 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
captionsArr = await OptimizeSubtitles(taskInfo, captionsArr, sections);
|
||||
//合并字幕
|
||||
var captions = ExpandFunction.GetSpeakerCaptions(captionsArr);
|
||||
var homework = await DetectHomeworkAssignment(taskInfo, captions, sections);
|
||||
if (homework != null)
|
||||
{
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>识别到作业布置 {homework.Content}");
|
||||
await redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Homework", homework);
|
||||
}
|
||||
var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0;
|
||||
List<VideoKnowRes>? questionRes = null;
|
||||
VideoKnowRes[]? questionRes = null;
|
||||
var tryCount = 20;
|
||||
while (tryCount-- > 0)
|
||||
{
|
||||
//视频字幕分析
|
||||
questionRes = await Analytics(taskInfo, captions, sections);
|
||||
|
||||
if (questionRes is null) continue;
|
||||
//处理分段 知识点
|
||||
List<VideoKonwPoint> insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);
|
||||
|
||||
var insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);
|
||||
//校验结果质量
|
||||
var checkRes = await VerifySpanQuality(questionRes, taskInfo, captions, sections, Course_Id);
|
||||
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>课堂内容AI分析结果 得分=>{checkRes.Score}");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>改进意见 {checkRes.Suggestion}");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>扣分原因 {checkRes.MinusScore}");
|
||||
// 质量复检
|
||||
if (checkRes != null)
|
||||
{
|
||||
var improved = await ImproveSpanBySuggestion(questionRes, taskInfo, captions, sections, "扣分原因 {checkRes.MinusScore} \n 改进意见 {checkRes.Suggestion}");
|
||||
if (improved != null)
|
||||
{
|
||||
var improvedCheck = await VerifySpanQuality(improved, taskInfo, captions, sections, Course_Id);
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后复检得分=>{improvedCheck.Score}");
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后扣分原因 {improvedCheck.MinusScore}");
|
||||
|
||||
if (improvedCheck != null && improvedCheck.Score >= 90 && improvedCheck.Score > checkRes.Score)
|
||||
{
|
||||
questionRes = improved;
|
||||
if (homework != null && (!questionRes.Any(s => s.Stage == StageEnum.作业布置.ToString())))
|
||||
questionRes.Add(homework);
|
||||
|
||||
insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);
|
||||
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
|
||||
await videoTaskStageDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
|
||||
var tStage = insertData.GroupBy(s => s.StageId).Select(s => new VideoTaskStage
|
||||
{
|
||||
Id = s.Key,
|
||||
TagId = s.First().TagId,
|
||||
CloudSchoolId = s.First().CloudSchoolId,
|
||||
StartTime = s.First().StartTime,
|
||||
EndTime = s.First().EndTime,
|
||||
Content = s.First().Content,
|
||||
TextbookSource = s.First().TextbookSource,
|
||||
Stage = s.First().Stage,
|
||||
Theme = s.First().Theme,
|
||||
VideoTaskId = taskInfo.Id,
|
||||
}).ToArray();
|
||||
await videoTaskStageDB.InsertRangeAsync(tStage);
|
||||
await videoKonwPointDB.InsertRangeAsync(insertData);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化之后的得分降低/得分过低");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (checkRes != null && checkRes.Score >= 90)
|
||||
if (checkRes != null && checkRes.Score >= 85)
|
||||
{
|
||||
//写入知识点
|
||||
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
|
|||
/// <returns></returns>
|
||||
/// <exception cref="Exception"></exception>
|
||||
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title,
|
||||
string model = null, int max_tokens = 32_000)
|
||||
string model = null, int max_tokens = 16000)
|
||||
{
|
||||
Message[] messageArr = [
|
||||
new Message(postMessages,"user"),
|
||||
|
|
@ -57,8 +57,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
|
|||
max_tokens = max_tokens,
|
||||
stream = true,
|
||||
temperature = 0.2f,
|
||||
messages = messageArr,
|
||||
max_completion_tokens= 12288,
|
||||
messages = messageArr
|
||||
};
|
||||
|
||||
chatReq.modalities = null;
|
||||
|
|
|
|||
|
|
@ -1,134 +0,0 @@
|
|||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SherpaOnnx;
|
||||
using SqlSugar.IOC;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using VideoAnalysisCore.Common;
|
||||
using VideoAnalysisCore.Model;
|
||||
using VideoAnalysisCore.Model.Enum;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||
{
|
||||
public static class FunASRNanoExpand
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// 添加 SenseVoice 语音转文字
|
||||
/// </summary>
|
||||
/// <param name="services"></param>
|
||||
public static void AddFunASRNanoExpand(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<FunASRNano>();
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// 基于 sherpa-onnx 平台接入的 Fun-ASR-Nano-2512
|
||||
/// <para>版本 Fun-ASR-Nano-2512</para>
|
||||
/// <para>来源 https://github.com/modelscope/FunASR/blob/main/README_zh.md</para>
|
||||
/// </summary>
|
||||
public class FunASRNano
|
||||
{
|
||||
public static OfflineRecognizer OR = default!;
|
||||
private readonly IServiceProvider serviceProvider;
|
||||
|
||||
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
|
||||
{
|
||||
this.serviceProvider = serviceProvider;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 初始化 SenseVoice
|
||||
/// </summary>
|
||||
/// <param name="numThreads">默认6线程</param>
|
||||
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
||||
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
|
||||
{
|
||||
Console.WriteLine("初始化 FunASRNano");
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
//采样率
|
||||
config.FeatConfig.SampleRate = 16000;
|
||||
//用于训练模型的特征维度
|
||||
config.FeatConfig.FeatureDim = 80;
|
||||
var topFolder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-funasr-nano-fp16-2025-12-30");
|
||||
|
||||
//模型配置
|
||||
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量
|
||||
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
|
||||
//接入的大语言模型
|
||||
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx");
|
||||
//插入预训练模型(如Transformer)的小型可训练模块 (如语音识别、情感分析)
|
||||
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
|
||||
//分词器
|
||||
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
|
||||
//提示词
|
||||
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
|
||||
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!";
|
||||
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
|
||||
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
|
||||
config.ModelConfig.FunAsrNano.TopP = 0.8f;
|
||||
config.ModelConfig.FunAsrNano.Seed = 42;
|
||||
|
||||
//模型类型
|
||||
config.ModelConfig.ModelType = string.Empty;
|
||||
config.ModelConfig.NumThreads = numThreads;
|
||||
config.ModelConfig.Provider = "cpu";
|
||||
//需要使用GPU
|
||||
if (!useGPU)
|
||||
config.ModelConfig.Provider = "cuda";
|
||||
#if DEBUG
|
||||
config.ModelConfig.Debug = 1;
|
||||
#endif
|
||||
OR = new OfflineRecognizer(config);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 获取语音字幕
|
||||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <returns></returns>
|
||||
public List<SenseVoiceRes> RunTask(Stream s)
|
||||
{
|
||||
if (s is null) throw new Exception("音频路径 is null");
|
||||
if (OR is null) Init();
|
||||
return serviceProvider.GetRequiredService<SherpaVad>()
|
||||
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.ten_vad_324);
|
||||
}
|
||||
/// <summary>
|
||||
/// 获取语音字幕
|
||||
/// </summary>
|
||||
/// <param name="task"></param>
|
||||
/// <returns></returns>
|
||||
public Task RunTask(string task)
|
||||
{
|
||||
var filePath = Path.Combine(task.LocalPath(), "task.wav");
|
||||
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
||||
throw new Exception("task 音频路径未找到");
|
||||
if (OR is null) Init();
|
||||
serviceProvider.GetRequiredService<SherpaVad>()
|
||||
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.ten_vad_324);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
/// <summary>
|
||||
/// 获取语音字幕
|
||||
/// </summary>
|
||||
/// <param name="sampleRate">采样率</param>
|
||||
/// <param name="samples">采样值(样品)</param>
|
||||
/// <returns>结果流</returns>
|
||||
public OfflineStream SoundHandle(int sampleRate, float[] samples)
|
||||
{
|
||||
var stream = OR.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, samples);
|
||||
OR.Decode(stream);
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
using Dm.util;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SherpaOnnx;
|
||||
using SqlSugar.IOC;
|
||||
|
|
@ -15,6 +14,7 @@ using System.Threading.Tasks;
|
|||
using VideoAnalysisCore.Common;
|
||||
using VideoAnalysisCore.Model;
|
||||
using VideoAnalysisCore.Model.Enum;
|
||||
using static System.Runtime.InteropServices.JavaScript.JSType;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||
{
|
||||
|
|
@ -32,17 +32,18 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
}
|
||||
public class SenseVoice
|
||||
{
|
||||
public static OfflineRecognizer OR = default!;
|
||||
private readonly IServiceProvider serviceProvider;
|
||||
//const string TransducerStr = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
|
||||
static OfflineRecognizer OR = default!;
|
||||
static OfflineRecognizer OR_old = default!;
|
||||
static VadModelConfig VADModelConfig = default!;
|
||||
public Repository<VideoTask> videoTaskDB { get; set; }
|
||||
|
||||
public static OfflineRecognizer OR1 = default!;
|
||||
//测试用
|
||||
public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>();
|
||||
private readonly RedisManager redisManager;
|
||||
|
||||
|
||||
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
|
||||
public SenseVoice(Repository<VideoTask> videoTaskDB, RedisManager redisManager)
|
||||
{
|
||||
this.serviceProvider = serviceProvider;
|
||||
this.videoTaskDB = videoTaskDB;
|
||||
this.redisManager = redisManager;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
@ -60,9 +61,10 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
config.FeatConfig.FeatureDim = 80;
|
||||
// Path to tokens.txt
|
||||
var AIModelVersion_270717 = "sherpa-onnx-sense-voice-24-07-17";
|
||||
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
|
||||
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
|
||||
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
|
||||
//SenseVoice 模型
|
||||
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
|
||||
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
|
||||
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
|
||||
config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
|
||||
//反转文本规范化规则 fst 的路径
|
||||
|
|
@ -89,34 +91,54 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
//config.MaxActivePaths =4;
|
||||
#endregion
|
||||
|
||||
#region 热词功能[无效]
|
||||
//if (false)
|
||||
//{
|
||||
// //热词目录
|
||||
// config.HotwordsFile = Path.Combine(AppCommon.AIModelFile, "Hotwords.txt");
|
||||
// config.DecodingMethod = "modified_beam_search";
|
||||
// //热词得分
|
||||
// config.HotwordsScore = 1.5f;
|
||||
|
||||
// config.ModelConfig.ModelingUnit = "cjkchar+bpe";
|
||||
// config.ModelConfig.BpeVocab = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "bpe.model");
|
||||
// config.ModelConfig.Transducer = new OfflineTransducerModelConfig()
|
||||
// {
|
||||
// Decoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "decoder-epoch-99-avg-1.onnx"),
|
||||
// Encoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "encoder-epoch-99-avg-1.onnx"),
|
||||
// Joiner = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "joiner-epoch-99-avg-1.onnx"),
|
||||
// };
|
||||
//}
|
||||
#endregion
|
||||
|
||||
|
||||
#if DEBUG
|
||||
config.ModelConfig.Debug = 1;
|
||||
#endif
|
||||
|
||||
OR = new OfflineRecognizer(config);
|
||||
|
||||
|
||||
OfflineRecognizerConfig oldConfig = new OfflineRecognizerConfig();
|
||||
//采样率
|
||||
oldConfig.FeatConfig.SampleRate = 16000;
|
||||
oldConfig.FeatConfig.FeatureDim = 80;
|
||||
oldConfig.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
|
||||
oldConfig.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
|
||||
oldConfig.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
|
||||
//反转文本规范化规则 fst 的路径
|
||||
//config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst");
|
||||
|
||||
oldConfig.ModelConfig.SenseVoice.Language = "zh";
|
||||
//模型类型
|
||||
oldConfig.ModelConfig.ModelType = string.Empty;
|
||||
oldConfig.ModelConfig.NumThreads = numThreads;
|
||||
oldConfig.ModelConfig.Provider = "cpu";
|
||||
OR_old = new OfflineRecognizer(oldConfig);
|
||||
|
||||
|
||||
|
||||
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
|
||||
OfflineRecognizerConfig config1 = new OfflineRecognizerConfig();
|
||||
config1.FeatConfig.SampleRate = 16000;
|
||||
config1.FeatConfig.FeatureDim = 80;
|
||||
config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
|
||||
config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
|
||||
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
|
||||
config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
|
||||
config1.ModelConfig.SenseVoice.Language = "zh";
|
||||
config1.ModelConfig.ModelType = string.Empty;
|
||||
config1.ModelConfig.NumThreads = numThreads;
|
||||
config1.ModelConfig.Provider = "cpu";
|
||||
config1.DecodingMethod = "greedy_search";
|
||||
config1.ModelConfig.Debug = 1;
|
||||
OR1 = new OfflineRecognizer(config: config1);
|
||||
//OR1 = FunASRNano.OR;
|
||||
|
||||
VADModelConfig = new VadModelConfig();
|
||||
VADModelConfig.SileroVad.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "silero_vad.onnx");
|
||||
VADModelConfig.Debug = 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
|
@ -124,44 +146,137 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <returns></returns>
|
||||
public List<SenseVoiceRes> RunTask(Stream s)
|
||||
public async Task<List<SenseVoiceRes>> RunTask(Stream s)
|
||||
{
|
||||
if (s is null) throw new Exception("音频路径 is null");
|
||||
if (OR is null) Init();
|
||||
return serviceProvider.GetRequiredService<SherpaVad>()
|
||||
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
|
||||
if (s is null)
|
||||
throw new Exception("音频路径 is null");
|
||||
return await TaskHandle(new WaveReader(s), null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 获取语音字幕
|
||||
/// </summary>
|
||||
/// <param name="task"></param>
|
||||
/// <returns></returns>
|
||||
public Task RunTask(string task)
|
||||
public async Task RunTask(string task)
|
||||
{
|
||||
var filePath = Path.Combine(task.LocalPath(), "task.wav");
|
||||
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
||||
throw new Exception("task 音频路径未找到");
|
||||
if (OR is null) Init();
|
||||
serviceProvider.GetRequiredService<SherpaVad>()
|
||||
.TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.silero_vad_v5);
|
||||
|
||||
return Task.CompletedTask;
|
||||
await TaskHandle(new WaveReader(filePath), task);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 获取语音字幕
|
||||
/// 任务处理
|
||||
/// </summary>
|
||||
/// <param name="sampleRate">采样率</param>
|
||||
/// <param name="samples">采样值(样品)</param>
|
||||
/// <returns>结果流</returns>
|
||||
public OfflineStream SoundHandle(int sampleRate, float[] samples)
|
||||
/// <param name="reader">Wave</param>
|
||||
/// <param name="task">任务id [默认Null]</param>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="Exception"></exception>
|
||||
public async Task<List<SenseVoiceRes>> TaskHandle(WaveReader reader, string? task )
|
||||
{
|
||||
var stream = OR.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, samples);
|
||||
OR.Decode(stream);
|
||||
return stream;
|
||||
}
|
||||
if (OR is null)
|
||||
Init();
|
||||
int numSamples = reader.Samples.Length;
|
||||
int windowSize = VADModelConfig.SileroVad.WindowSize;
|
||||
int sampleRate = VADModelConfig.SampleRate;
|
||||
int numIter = numSamples / windowSize;
|
||||
var totalSecond = numSamples / (float)sampleRate;
|
||||
var res = new List<SenseVoiceRes>(500);
|
||||
using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 20);
|
||||
for (int i = 0; i != numIter; ++i)
|
||||
{
|
||||
int start = i * windowSize;
|
||||
float[] samples = new float[windowSize];
|
||||
Array.Copy(reader.Samples, start, samples, 0, windowSize);
|
||||
VAD.AcceptWaveform(samples);
|
||||
|
||||
//Memory<float> samples = new float[windowSize];
|
||||
//Memory<float> sourceSpan = reader.Samples.AsMemory(start, windowSize);
|
||||
//sourceSpan.CopyTo(samples);
|
||||
//VAD.AcceptWaveform(samples.ToArray());
|
||||
|
||||
//是否检测到语音
|
||||
if (VAD.IsSpeechDetected())
|
||||
{
|
||||
//获取最新的发言片段
|
||||
while (!VAD.IsEmpty())
|
||||
{
|
||||
var p = await ReadNext(VAD,res, totalSecond);
|
||||
if (p != null) redisManager.SetTaskProgress(task, p + "%");
|
||||
}
|
||||
}
|
||||
}
|
||||
VAD.Flush();
|
||||
while (!VAD.IsEmpty())
|
||||
{
|
||||
var p = await ReadNext(VAD, res, totalSecond);
|
||||
if(p!= null) redisManager.SetTaskProgress(task, p + "%");
|
||||
}
|
||||
//如果携带任务ID
|
||||
if (!string.IsNullOrEmpty(task))
|
||||
{
|
||||
await redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count);
|
||||
var captionsStr = res.ToJson();
|
||||
await videoTaskDB.AsUpdateable()
|
||||
.SetColumns(it => it.Captions == captionsStr)
|
||||
.Where(it => it.Id == long.Parse(task))
|
||||
.ExecuteCommandAsync();
|
||||
await redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Captions", res);
|
||||
//分析完成视频字幕后继续接收任务
|
||||
//redisManager.NewTask();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
/// <summary>
|
||||
/// 处理vad 下一个切片
|
||||
/// </summary>
|
||||
/// <param name="VAD"></param>
|
||||
/// <param name="res">字幕处理后写入数组</param>
|
||||
/// <param name="totalSecond">总时长</param>
|
||||
/// <param name="progressCallback">任务回调</param>
|
||||
/// <returns></returns>
|
||||
public async Task<double?> ReadNext(VoiceActivityDetector VAD, List<SenseVoiceRes> res, float totalSecond)
|
||||
{
|
||||
var segment = VAD.Front();
|
||||
var sampleRate = VADModelConfig.SampleRate;
|
||||
var sampleRateF = (float)VADModelConfig.SampleRate;
|
||||
float startTime = segment.Start / sampleRateF;
|
||||
float duration = segment.Samples.Length / sampleRateF;
|
||||
using var stream = OR.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||
OR.Decode(stream);
|
||||
|
||||
//old
|
||||
using var stream1 = OR_old.CreateStream();
|
||||
stream1.AcceptWaveform(sampleRate, segment.Samples);
|
||||
OR.Decode(stream1);
|
||||
if (stream.Result.Text != stream1.Result.Text)
|
||||
{
|
||||
Console.WriteLine("=>" + (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero));
|
||||
Console.WriteLine("新=>" + stream.Result.Text);
|
||||
Console.WriteLine("旧=>" + stream1.Result.Text);
|
||||
}
|
||||
Console.WriteLine();
|
||||
double? resP =null;
|
||||
if (!string.IsNullOrEmpty(stream.Result.Text))
|
||||
{
|
||||
var text = stream.Result.Text.Trim();
|
||||
if (text.Length == 1 && text == "。")// 检查字符是否只有一个句号
|
||||
{
|
||||
VAD.Pop();
|
||||
return resP;
|
||||
}
|
||||
res.Add(new()
|
||||
{
|
||||
Text = stream.Result.Text,
|
||||
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
|
||||
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
|
||||
});
|
||||
resP = Math.Round((double)(startTime + duration) / (totalSecond) * 100, 2);
|
||||
}
|
||||
VAD.Pop();
|
||||
return resP;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,210 +0,0 @@
|
|||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using SherpaOnnx;
|
||||
using SqlSugar;
|
||||
using SqlSugar.IOC;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using VideoAnalysisCore.Common;
|
||||
using VideoAnalysisCore.Model;
|
||||
using VideoAnalysisCore.Model.Enum;
|
||||
using static System.Net.WebRequestMethods;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||
{
|
||||
public static class SherpaVadExpand
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// 添加 Vad 语言切片
|
||||
/// </summary>
|
||||
/// <param name="services"></param>
|
||||
public static void AddSherpaVadExpand(this IServiceCollection services)
|
||||
{
|
||||
services.AddTransient<SherpaVad>();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 语音切片服务的版本
|
||||
/// </summary>
|
||||
public class SherpaVadVersion
|
||||
{
|
||||
public const string silero_vad_v4 = "silero_vad_v4.onnx";
|
||||
public const string silero_vad_v5 = "silero_vad_v5.onnx";
|
||||
/// <summary>
|
||||
/// ten_vad (324 kb版本)
|
||||
/// </summary>
|
||||
public const string ten_vad_324 = "ten-vad.onnx";
|
||||
}
|
||||
/// <summary>
|
||||
/// 语音切片服务
|
||||
/// </summary>
|
||||
public class SherpaVad
|
||||
{
|
||||
static VadModelConfig VADModelConfig = default!;
|
||||
|
||||
private readonly RedisManager redisManager;
|
||||
private readonly IServiceProvider serviceProvider;
|
||||
private readonly VoiceActivityDetector vad;
|
||||
private Func<int, float[], OfflineStream> Callback;
|
||||
|
||||
|
||||
public SherpaVad(RedisManager redisManager, IServiceProvider serviceProvider)
|
||||
{
|
||||
this.redisManager = redisManager;
|
||||
this.serviceProvider = serviceProvider;
|
||||
VADModelConfig = new VadModelConfig();
|
||||
|
||||
VADModelConfig.SampleRate = 16000;
|
||||
VADModelConfig.NumThreads = 1;
|
||||
VADModelConfig.Provider = "cpu";
|
||||
#if DEBUG
|
||||
VADModelConfig.Debug = 1;
|
||||
#endif
|
||||
VADModelConfig.SileroVad = new SileroVadModelConfig();
|
||||
VADModelConfig.TenVad = new TenVadModelConfig();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 初始化 SenseVoice
|
||||
/// </summary>
|
||||
/// <param name="func">vad识别成功后触发后回调</param>
|
||||
/// <param name="vadVersion">版本采用 <see cref="SherpaVadVersion.silero_vad_v5"/> </param>
|
||||
/// <param name="numThreads">默认1线程</param>
|
||||
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境<see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
||||
private void Init(Func<int, float[], OfflineStream> func, string vadVersion = SherpaVadVersion.silero_vad_v5, int numThreads = 1, bool useGPU = false)
|
||||
{
|
||||
VADModelConfig.NumThreads = numThreads;
|
||||
VADModelConfig.Provider = useGPU? "cuda" : "cpu";
|
||||
var path = Path.Combine(AppCommon.AIModelFile, "vad", SherpaVadVersion.silero_vad_v5);
|
||||
switch (vadVersion)
|
||||
{
|
||||
case SherpaVadVersion.silero_vad_v4:
|
||||
case SherpaVadVersion.silero_vad_v5:
|
||||
VADModelConfig.SileroVad.Model = path;
|
||||
break;
|
||||
case SherpaVadVersion.ten_vad_324:
|
||||
VADModelConfig.TenVad.Model = path;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
Callback = func;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 任务处理
|
||||
/// </summary>
|
||||
/// <param name="reader">Wave</param>
|
||||
/// <param name="func">vad识别成功后触发后回调</param>
|
||||
/// <param name="vadVersion">版本采用 <see cref="SherpaVadVersion.silero_vad_v5"/> </param>
|
||||
/// <param name="numThreads">默认1线程</param>
|
||||
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境<see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
|
||||
|
||||
/// <param name="task">任务id [默认Null]</param>
|
||||
/// <returns></returns>
|
||||
/// <exception cref="Exception"></exception>
|
||||
public List<SenseVoiceRes> TaskHandle(WaveReader reader, string? task,Func<int, float[], OfflineStream> func, string vadVersion = SherpaVadVersion.silero_vad_v5, int numThreads = 1, bool useGPU = false )
|
||||
{
|
||||
Init(func, vadVersion, numThreads, useGPU);
|
||||
// 使用 Span 操作原始数据
|
||||
ReadOnlySpan<float> allSamples = reader.Samples.AsSpan();
|
||||
int numSamples = allSamples.Length;
|
||||
int windowSize = VADModelConfig.SileroVad.WindowSize;
|
||||
int sampleRate = VADModelConfig.SampleRate;
|
||||
int numIter = numSamples / windowSize;
|
||||
var totalSecond = numSamples / (float)sampleRate;
|
||||
var res = new List<SenseVoiceRes>(500);
|
||||
|
||||
using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 30);
|
||||
|
||||
// 优化:复用缓冲区,避免在循环中重复分配内存
|
||||
float[] buffer = new float[windowSize];
|
||||
|
||||
for (int i = 0; i != numIter; ++i)
|
||||
{
|
||||
int start = i * windowSize;
|
||||
|
||||
// 使用 Span 高效复制数据到固定缓冲区
|
||||
allSamples.Slice(start, windowSize).CopyTo(buffer);
|
||||
|
||||
VAD.AcceptWaveform(buffer);
|
||||
|
||||
//是否检测到语音
|
||||
if (VAD.IsSpeechDetected())
|
||||
{
|
||||
//获取最新的发言片段
|
||||
while (!VAD.IsEmpty())
|
||||
{
|
||||
var p = ReadNext(VAD,res, totalSecond);
|
||||
if (p != null) redisManager.SetTaskProgress(task, p + "%");
|
||||
}
|
||||
}
|
||||
}
|
||||
VAD.Flush();
|
||||
while (!VAD.IsEmpty())
|
||||
{
|
||||
var p = ReadNext(VAD, res, totalSecond);
|
||||
if(p!= null) redisManager.SetTaskProgress(task, p + "%");
|
||||
}
|
||||
//如果携带任务ID
|
||||
if (!string.IsNullOrEmpty(task))
|
||||
{
|
||||
_ = redisManager.AddTaskLog(task, "==>字幕数量" + res.Count);
|
||||
var captionsStr = res.ToJson();
|
||||
_ = serviceProvider.GetRequiredService<Repository<VideoTask>>()
|
||||
.AsUpdateable()
|
||||
.SetColumns(it => it.Captions == captionsStr)
|
||||
.Where(it => it.Id == long.Parse(task))
|
||||
.ExecuteCommandAsync();
|
||||
_ = redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Captions", res);
|
||||
//分析完成视频字幕后继续接收任务
|
||||
//redisManager.NewTask();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
/// <summary>
|
||||
/// 处理vad 下一个切片
|
||||
/// </summary>
|
||||
/// <param name="VAD"></param>
|
||||
/// <param name="res">字幕处理后写入数组</param>
|
||||
/// <param name="totalSecond">总时长</param>
|
||||
/// <returns></returns>
|
||||
public double? ReadNext(VoiceActivityDetector VAD, List<SenseVoiceRes> res, float totalSecond)
|
||||
{
|
||||
var segment = VAD.Front();
|
||||
var sampleRate = VADModelConfig.SampleRate;
|
||||
var sampleRateF = (float)VADModelConfig.SampleRate;
|
||||
float startTime = segment.Start / sampleRateF;
|
||||
float duration = segment.Samples.Length / sampleRateF;
|
||||
using var stream = Callback(sampleRate, segment.Samples);
|
||||
double? resP =null;
|
||||
if (!string.IsNullOrEmpty(stream.Result.Text))
|
||||
{
|
||||
var text = stream.Result.Text.Trim();
|
||||
if (text.Length == 1 && text == "。")// 检查字符是否只有一个句号
|
||||
{
|
||||
VAD.Pop();
|
||||
return resP;
|
||||
}
|
||||
res.Add(new()
|
||||
{
|
||||
Text = stream.Result.Text,
|
||||
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
|
||||
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
|
||||
});
|
||||
resP = Math.Round((double)(startTime + duration) / (totalSecond) * 100, 2);
|
||||
}
|
||||
VAD.Pop();
|
||||
return resP;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -51,7 +51,7 @@ namespace VideoAnalysisCore.Common.Expand
|
|||
public static class AliyunOSSExpand
|
||||
{
|
||||
/// <summary>
|
||||
/// 使用阿里云 OSS拓展
|
||||
/// 使用阿里云 vod拓展
|
||||
/// </summary>
|
||||
/// <param name="service"></param>
|
||||
/// <returns></returns>
|
||||
|
|
|
|||
|
|
@ -221,11 +221,6 @@ namespace VideoAnalysisCore.Common.Expand
|
|||
/// </summary>
|
||||
public static class SSimpLetexExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// 公式图片识别
|
||||
/// </summary>
|
||||
/// <param name="services"></param>
|
||||
/// <returns></returns>
|
||||
public static IServiceCollection AddSimpleTexOcrClient(this IServiceCollection services)
|
||||
{
|
||||
services.AddSingleton<SimpLetexClient>();
|
||||
|
|
|
|||
|
|
@ -19,10 +19,6 @@ namespace VideoAnalysisCore.Common.Expand
|
|||
public static class SqlSugarExpand
|
||||
{
|
||||
public static bool ShowSQL = false;
|
||||
/// <summary>
|
||||
/// 数据库ORM拓展
|
||||
/// </summary>
|
||||
/// <param name="services"></param>
|
||||
public static void AddSqlSugarExpand(this IServiceCollection services)
|
||||
{
|
||||
|
||||
|
|
|
|||
|
|
@ -88,10 +88,6 @@ namespace VideoAnalysisCore.Common
|
|||
Console.WriteLine($"{DateTime.Now}=>初始化 Redis任务队列");
|
||||
service.AddSingleton<RedisInit>();
|
||||
}
|
||||
/// <summary>
|
||||
/// redis连接拓展(包含消息队列任务)
|
||||
/// </summary>
|
||||
/// <param name="service"></param>
|
||||
public static void AddRedisExpand(this IServiceCollection service)
|
||||
{
|
||||
Console.WriteLine($"{DateTime.Now}=>初始化 Redis");
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ namespace VideoAnalysisCore.Controllers
|
|||
private readonly Repository<VideoTask> videoTaskDB;
|
||||
private readonly Repository<CourseInfo> courseInfoDB;
|
||||
private readonly Repository<VideoKonwPoint> videoKonwPointDB;
|
||||
private readonly Repository<VideoTaskStage> videoTaskStageDB;
|
||||
private readonly Repository<NodePackageInfo> nodePackageInfoDB;
|
||||
private readonly Repository<VideoQuestion> videoQuestionDB;
|
||||
private readonly Repository<VideoQuestionKonw> videoQuestionKonwDB;
|
||||
|
|
@ -43,7 +42,7 @@ namespace VideoAnalysisCore.Controllers
|
|||
|
||||
public LJZK_Controller(IMapper mp, Repository<NodeSubscription> nodesubscriptionDB,
|
||||
Repository<VideoTask> videoTaskDB = null, Repository<VideoKonwPoint> videoKonwPointDB = null
|
||||
, Repository<NodePackageInfo> nodePackageInfoDB = null, Repository<VideoQuestion> videoQuestionDB = null, Repository<VideoQuestionKonw> videoQuestionKonwDB = null, Repository<CourseInfo> courseInfoDB = null, RedisManager redisManager = null, Repository<VideoTaskStage> videoTaskStageDB = null)
|
||||
, Repository<NodePackageInfo> nodePackageInfoDB = null, Repository<VideoQuestion> videoQuestionDB = null, Repository<VideoQuestionKonw> videoQuestionKonwDB = null, Repository<CourseInfo> courseInfoDB = null, RedisManager redisManager = null)
|
||||
{
|
||||
this.mp = mp;
|
||||
this.nodesubscriptionDB = nodesubscriptionDB;
|
||||
|
|
@ -54,7 +53,6 @@ namespace VideoAnalysisCore.Controllers
|
|||
this.videoQuestionKonwDB = videoQuestionKonwDB;
|
||||
this.courseInfoDB = courseInfoDB;
|
||||
this.redisManager = redisManager;
|
||||
this.videoTaskStageDB = videoTaskStageDB;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -203,49 +201,27 @@ namespace VideoAnalysisCore.Controllers
|
|||
.ToArrayAsync();
|
||||
if (konwArr is null || konwArr.Length == 0)
|
||||
return BadRequest("ÎÞÓÐЧÈÎÎñ·Ö¶Î");
|
||||
|
||||
|
||||
var stageArr = await videoTaskStageDB.AsQueryable()
|
||||
.Where(s => s.VideoTaskId == task.Id)
|
||||
.ToArrayAsync();
|
||||
var videoKnowDic = konwArr
|
||||
.GroupBy(s => s.StageId)
|
||||
.ToDictionary(s => s.Key);
|
||||
var videoKnows = stageArr
|
||||
.Select(s => new VideoKnowRes()
|
||||
{
|
||||
Content = s.Content,
|
||||
StartTime = s.StartTime,
|
||||
EndTime = s.EndTime,
|
||||
Theme = s.Theme,
|
||||
StageId = s.Id,
|
||||
KnowPoint = videoKnowDic.ContainsKey(s.Id)
|
||||
? string.Join(',', videoKnowDic[s.Id].Select(x => x.KnowPoint))
|
||||
: string.Empty
|
||||
}).ToArray();
|
||||
|
||||
var res = new TaskKnowRes()
|
||||
{
|
||||
TagId = task.TagId,
|
||||
Status = task.LastEnum,
|
||||
VideoTaskId = task.Id,
|
||||
KnowBlockArr = stageArr
|
||||
KnowBlockArr = konwArr
|
||||
.GroupBy(s => s.StartTime)
|
||||
.Select(s => new TaskKnowBlock()
|
||||
{
|
||||
Id = s.Id,
|
||||
Content = s.Content,
|
||||
StartTime = s.StartTime,
|
||||
StageId = s.Id,
|
||||
EndTime = s.EndTime,
|
||||
Theme = s.Theme,
|
||||
Know = videoKnowDic.ContainsKey(s.Id)
|
||||
? videoKnowDic[s.Id]?.Select(x => new TaskKnowInfo()
|
||||
Id = s.First().Id,
|
||||
Content = s.First().Content,
|
||||
StartTime = s.First().StartTime,
|
||||
StageId = s.First().StageId,
|
||||
EndTime = s.First().EndTime,
|
||||
Theme = s.First().Theme,
|
||||
Know = s.Select(x => new TaskKnowInfo()
|
||||
{
|
||||
Id = x.Id,
|
||||
KnowPoint = x.KnowPoint,
|
||||
KnowPointId = x.KnowPointId,
|
||||
})?.ToArray()
|
||||
: null
|
||||
KnowPointId = x.KnowPointId
|
||||
}).ToArray()
|
||||
}).ToArray()
|
||||
};
|
||||
if (task.VideoType == AttachmentsInfoType.¸´Ï°)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ using MapsterMapper;
|
|||
using Microsoft.AspNetCore.Authorization;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.AspNetCore.Mvc;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using SqlSugar;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
|
@ -33,19 +32,15 @@ namespace VideoAnalysisCore.Controllers
|
|||
readonly Repository<VideoTask> baseService;
|
||||
readonly Repository<VideoQuestion> videoQuestionDB;
|
||||
readonly Repository<VideoKonwPoint> videoKonwPointDB;
|
||||
readonly Repository<VideoTaskStage> videoTaskStageDB;
|
||||
readonly Repository<VideoQuestionKonw> videoQuestionKonwDB;
|
||||
readonly Repository<TaskLog> taskLogDB;
|
||||
|
||||
|
||||
readonly RedisManager redisManager;
|
||||
public readonly SenseVoice senseVoice;
|
||||
public readonly FunASRNano funASRNano;
|
||||
|
||||
private readonly IMapper mp;
|
||||
public VideoTaskController(Repository<VideoTask> baseService, RedisManager redisManager,
|
||||
Repository<VideoQuestion> videoQuestionDB,
|
||||
Repository<VideoQuestionKonw> videoQuestionKonwDB, Repository<VideoKonwPoint> videoKonwPointDB, SenseVoice senseVoice, IMapper mp, Repository<TaskLog> taskLogDB, FunASRNano funASRNano, Repository<VideoTaskStage> videoTaskStageDB) : base(baseService)
|
||||
Repository<VideoQuestionKonw> videoQuestionKonwDB, Repository<VideoKonwPoint> videoKonwPointDB, SenseVoice senseVoice, IMapper mp, Repository<TaskLog> taskLogDB) : base(baseService)
|
||||
{
|
||||
this.baseService = baseService;
|
||||
this.redisManager = redisManager;
|
||||
|
|
@ -55,8 +50,6 @@ namespace VideoAnalysisCore.Controllers
|
|||
this.senseVoice = senseVoice;
|
||||
this.mp = mp;
|
||||
this.taskLogDB = taskLogDB;
|
||||
this.funASRNano = funASRNano;
|
||||
this.videoTaskStageDB = videoTaskStageDB;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -136,7 +129,7 @@ namespace VideoAnalysisCore.Controllers
|
|||
using HttpClient client = new HttpClient();
|
||||
// 发送GET请求获取网络文件流
|
||||
using var networkStream = await client.GetStreamAsync(url);
|
||||
var res = senseVoice.RunTask(networkStream);
|
||||
var res = await senseVoice.RunTask(networkStream);
|
||||
return Ok(res);
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
|
@ -150,34 +143,13 @@ namespace VideoAnalysisCore.Controllers
|
|||
/// <param name="file">文件流</param>
|
||||
/// <returns></returns>
|
||||
[HttpPost(Name = "AudioRecognition")]
|
||||
public IActionResult AudioRecognition(IFormFile file)
|
||||
public async Task<IActionResult> AudioRecognition(IFormFile file)
|
||||
{
|
||||
using var s = file.OpenReadStream();
|
||||
senseVoice.RunTask(s);
|
||||
return Ok();
|
||||
var res = await senseVoice.RunTask(s);
|
||||
return Ok(res);
|
||||
}
|
||||
/// <summary>
|
||||
/// 语音识别
|
||||
/// </summary>
|
||||
/// <param name="file">文件流</param>
|
||||
/// <returns></returns>
|
||||
[HttpPost(Name = "AudioRecognition_test")]
|
||||
public IActionResult AudioRecognition_test(IFormFile file)
|
||||
{
|
||||
using var s = file.OpenReadStream();
|
||||
|
||||
var x = AppCommon.Services.GetService<FunASRNano>();
|
||||
x.Init();
|
||||
senseVoice.RunTask(s);
|
||||
for (int i = 0; i < SenseVoice.cachedValue.Count(); i++)
|
||||
{
|
||||
Console.WriteLine($"字幕索引=>{i}");
|
||||
Console.WriteLine($"ssv=>{SenseVoice.cachedValue[i].z1}");
|
||||
Console.WriteLine($"fun=>{SenseVoice.cachedValue[i].z2}");
|
||||
Console.WriteLine();
|
||||
}
|
||||
return Ok();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 获取FTS_Data str
|
||||
|
|
@ -373,23 +345,17 @@ namespace VideoAnalysisCore.Controllers
|
|||
var konwArr = await videoKonwPointDB.AsQueryable()
|
||||
.Where(s => s.VideoTaskId == nowTask.Id)
|
||||
.ToArrayAsync();
|
||||
var stageArr = await videoTaskStageDB.AsQueryable()
|
||||
.Where(s => s.VideoTaskId == nowTask.Id)
|
||||
.ToArrayAsync();
|
||||
var videoKnowDic = konwArr
|
||||
.GroupBy(s => s.StageId)
|
||||
.ToDictionary(s => s.Key);
|
||||
var videoKnows = stageArr
|
||||
|
||||
var videoKnows = konwArr
|
||||
.GroupBy(s => s.StartTime)
|
||||
.Select(s => new VideoKnowRes()
|
||||
{
|
||||
Content = s.Content,
|
||||
StartTime = s.StartTime,
|
||||
EndTime = s.EndTime,
|
||||
Theme = s.Theme,
|
||||
StageId = s.Id,
|
||||
KnowPoint = videoKnowDic.ContainsKey(s.Id)
|
||||
? string.Join(',', videoKnowDic[s.Id].Select(x => x.KnowPoint))
|
||||
: string.Empty
|
||||
Content = s.First().Content,
|
||||
StartTime = s.First().StartTime,
|
||||
EndTime = s.First().EndTime,
|
||||
Theme = s.First().Theme,
|
||||
StageId = s.First().StageId,
|
||||
KnowPoint = string.Join(',', s.Select(x => x.KnowPoint))
|
||||
}).ToArray();
|
||||
if (nowTask.VideoType == AttachmentsInfoType.复习)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -65,18 +65,9 @@ namespace VideoAnalysisCore.Model
|
|||
/// </summary>
|
||||
public string? KnowPointId { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点占比权重
|
||||
/// 内容总结
|
||||
/// </summary>
|
||||
public float? KnowPointWeight { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点来源
|
||||
/// </summary>
|
||||
public string KnowSource { get; set; }
|
||||
/// <summary>
|
||||
/// 内容总结[不写入数据库]
|
||||
/// </summary>
|
||||
[SugarColumn(IsIgnore = true)]
|
||||
public virtual string? Content { get; set; }
|
||||
public string? Content { get; set; }
|
||||
/// <summary>
|
||||
/// 课程阶段
|
||||
/// </summary>
|
||||
|
|
@ -88,11 +79,5 @@ namespace VideoAnalysisCore.Model
|
|||
/// </summary>
|
||||
[SugarColumn(IsNullable = true)]
|
||||
public long? CloudSchoolId { get; set; }
|
||||
/// <summary>
|
||||
/// 教材来源
|
||||
/// <para> 课本/试卷/挹青苑 ...</para>
|
||||
/// </summary>
|
||||
[SugarColumn(IsIgnore = true)]
|
||||
public virtual string? TextbookSource { get; set; }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,78 +0,0 @@
|
|||
using SqlSugar;
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using System.Net;
|
||||
using System.Text.Json;
|
||||
using UserCenter.Model.Enum;
|
||||
using VideoAnalysisCore.AICore.GPT.Dto;
|
||||
using VideoAnalysisCore.AICore.SherpaOnnx;
|
||||
using VideoAnalysisCore.Model.Enum;
|
||||
using VideoAnalysisCore.Model.Interface;
|
||||
using Whisper.net;
|
||||
|
||||
namespace VideoAnalysisCore.Model
|
||||
{
|
||||
/// <summary>
|
||||
/// 视频片段
|
||||
/// </summary>
|
||||
[SugarTable("videotaskstage")]
|
||||
public class VideoTaskStage : IDB
|
||||
{
|
||||
/// <summary>
|
||||
/// id
|
||||
/// </summary>
|
||||
[SugarColumn(IsPrimaryKey = true)]
|
||||
public long Id { get; set; }
|
||||
/// <summary>
|
||||
/// 视频任务id
|
||||
/// <see cref="VideoTask.Id"/>
|
||||
/// </summary>
|
||||
public long VideoTaskId { get; set; }
|
||||
/// <summary>
|
||||
/// 自定义Id [任务视频自定义id]
|
||||
/// <see cref="VideoTask.TagId"/>
|
||||
/// </summary>
|
||||
[SugarColumn(Length = 500, IsNullable = true)]
|
||||
public string? TagId { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// 开始时间
|
||||
/// </summary>
|
||||
[SugarColumn( IsNullable = true)]
|
||||
public float? StartTime { get; set; }
|
||||
/// <summary>
|
||||
/// 结束时间
|
||||
/// </summary>
|
||||
[SugarColumn(IsNullable = true)]
|
||||
public float? EndTime { get; set; }
|
||||
/// <summary>
|
||||
/// 持续时间
|
||||
/// </summary>
|
||||
[SugarColumn(IsIgnore = true)]
|
||||
public float? KeepTime => (EndTime ?? 0) - StartTime ?? 0;
|
||||
/// <summary>
|
||||
/// 主题
|
||||
/// </summary>
|
||||
public string? Theme { get; set; }
|
||||
/// <summary>
|
||||
/// 知识点来源 视频秒,来源原因
|
||||
/// </summary>
|
||||
public string Content { get; set; }
|
||||
/// <summary>
|
||||
/// 课程阶段
|
||||
/// </summary>
|
||||
[SugarColumn(IsIgnore = true)]
|
||||
public virtual StageEnum? Stage { get; set; }
|
||||
/// <summary>
|
||||
/// 视频所属云校ID
|
||||
/// <para><see cref="UserCenter.Model.CloudSchool"/> 用户中心的云校id</para>
|
||||
/// </summary>
|
||||
[SugarColumn(IsNullable = true)]
|
||||
public long? CloudSchoolId { get; set; }
|
||||
/// <summary>
|
||||
/// 教材来源
|
||||
/// <para> 课本/试卷/挹青苑 ...</para>
|
||||
/// </summary>
|
||||
[SugarColumn(IsNullable = true)]
|
||||
public virtual string? TextbookSource { get; set; }
|
||||
}
|
||||
}
|
||||
|
|
@ -71,7 +71,7 @@
|
|||
<PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.21" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.20" />
|
||||
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
|
||||
<PackageReference Include="SqlSugar.IOC" Version="2.0.0" />
|
||||
<PackageReference Include="SqlSugarCore" Version="5.1.4.205" />
|
||||
|
|
|
|||
Loading…
Reference in New Issue