Compare commits

...

6 Commits

20 changed files with 927 additions and 272 deletions

View File

@ -10,6 +10,11 @@ namespace Learn.VideoAnalysis.Expand
{
public static class AuthorizeExpand
{
/// <summary>
/// 框架API授权
/// </summary>
/// <param name="services"></param>
/// <returns></returns>
public static IServiceCollection AddPermissionAuthentication(this IServiceCollection services)
{
services.AddAuthentication()

View File

@ -38,25 +38,28 @@ namespace Learn.VideoAnalysis
loggingBuilder.SetMinimumLevel(LogLevel.Warning); // 设置最小日志级别为 Warning
});
//绑定 appsetting 配置
builder.Configuration.AddAppConfig(args);
//初始化 插件
builder.Services.AddEndpointsApiExplorer();
builder.Services.AddSwaggerExpand("AI视频分析");
//°ó¶¨ appsetting ÅäÖÃ
builder.Configuration.AddAppConfig(args);
builder.Services.AddPermissionAuthentication();
builder.Services.AddSqlSugarExpand();
builder.Services.AddRedisExpand();
//工作流
builder.Services.AddSimpleTexOcrClient();
builder.Services.AddDownloadFileExpand();
builder.Services.AddFFMPGEExpand();
builder.Services.AddAlibabaCloudVod();
builder.Services.AddAliyunOSS();
//语音转写
builder.Services.AddSenseVoiceExpand();
builder.Services.AddFunASRNanoExpand();
builder.Services.AddSherpaVadExpand();
//builder.Services.AddSpeakerAI();
//定时任务
builder.Services.AddCoravel();
//SenseVoice.Init();
//异常过滤器
builder.Services.AddControllersWithViews(options =>
{

View File

@ -22,7 +22,7 @@ namespace VideoAnalysisCore.AICore.FFMPGE
public static class FFMPGEExpand
{
/// <summary>
/// 添加跨域拓展
/// 添加FFPMPEG拓展
/// </summary>
/// <param name="services"></param>
public static void AddFFMPGEExpand(this IServiceCollection services)

View File

@ -42,7 +42,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model =null, int max_tokens = 16000)
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title, string model =null, int max_tokens = 32000)
{
Message[] messageArr = [
new Message(postMessages,"user"),
@ -53,7 +53,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
taskId = task,
title = title,
model = model ?? ChatGPTType.Deepseek_Reasoner,
max_tokens = model == ChatGPTType.Deepseek_Reasoner ? 16000 : max_tokens,
max_tokens = model == ChatGPTType.Deepseek_Reasoner ? 32000 : max_tokens,
stream = true,
temperature = 0.2f,
messages = messageArr

View File

@ -32,6 +32,13 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
public virtual string? Content { get; set; }
}
public class VideoKnowPointDto
{
public float KnowPointWeight { get; set; }
public string KnowPoint { get; set; }
public string KnowPointId { get; set; }
public string KnowSource { get; set; }
}
public class VideoKnowRes
{
/// <summary>
@ -50,10 +57,18 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
public virtual long? StageId { get; set; }
public virtual VideoQuestionShowDto[]? QuestionArr { get; set; }
/// <summary>
/// 知识点列表
/// </summary>
public virtual VideoKnowPointDto[]? KnowPoints { get; set; }
/// <summary>
/// 知识点
/// </summary>
public virtual string? KnowPoint { get; set; }
/// <summary>
/// 知识点权重
/// </summary>
public virtual float? KnowPointWeight { get; set; }
/// <summary>
/// 知识点ID
/// </summary>
public virtual string? KnowPointId { get; set; }
@ -65,6 +80,11 @@ namespace VideoAnalysisCore.AICore.GPT.Dto
/// 内容总结
/// </summary>
public virtual string? Content { get; set; }
/// <summary>
/// 教材来源
/// <para> 课本/试卷/挹青苑 ...</para>
/// </summary>
public virtual string? TextbookSource { get; set; }
}
public class FileNameInfo

View File

@ -13,6 +13,8 @@ using System.IO;
using VideoAnalysisCore.AICore.GPT.ChatGPT;
using System.Threading.Tasks;
using System.Text.Json;
using FFmpeg.NET.Services;
using NetTaste;
namespace VideoAnalysisCore.AICore.GPT
{
@ -76,7 +78,7 @@ namespace VideoAnalysisCore.AICore.GPT
{
throw new Exception("请求GPT服务器失败次数过多");
}
goto PostJsonStream;
goto PostJsonStream;
}
using var stream = chatResp.Content.ReadAsStream();
using var reader = new StreamReader(stream, Encoding.UTF8);
@ -87,7 +89,7 @@ namespace VideoAnalysisCore.AICore.GPT
var splitCount = "data:".Length;
var maxLoop = 50 * 200;
int threshold = 0;
var startTime= DateTime.Now;
var startTime = DateTime.Now;
var endTime = startTime.AddHours(1.5);
//最长分析分析时间1.5小时 或者重试读取 1w次
while (maxLoop > 0 && DateTime.Now < endTime)
@ -156,7 +158,7 @@ namespace VideoAnalysisCore.AICore.GPT
/// <param name="max_tokens">最大token <para>不设置默认最大值 16000/8000</para></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public async Task<T> ChatAsync<T>(ChatRequest chatRep)
public async Task<T> ChatAsync<T>(ChatRequest chatRep) where T:class,new()
{
var tryCount = 10;
while (tryCount-- > 0)
@ -180,14 +182,20 @@ namespace VideoAnalysisCore.AICore.GPT
chatResContent = chatResContent?.Replace("```", "");
chatResContent = chatResContent?.Replace("}{", "},{");
chatResContent = chatResContent?.Replace("}|{", "},{");
chatResContent = chatResContent?.Trim();
chatResContent = chatResContent?.Trim();
chatResContent = chatResContent?.ExtractJsonStrings()?.FirstOrDefault();
if (string.IsNullOrEmpty(chatResContent))
throw new Exception($"GPT返回结果无有效JSON =>{chatResp?.res}");
var startsStr = typeof(T).IsArray ? "[" : "{";
var endStr = typeof(T).IsArray ? "]" : "}";
var startsStr = "{";
var endStr = "}";
var resT = new T();
if (resT is Array || resT is System.Collections.IList || resT is System.Collections.IList)
{
startsStr = "[";
endStr = "]";
}
if (!chatResContent.StartsWith(startsStr))
chatResContent = startsStr + chatResContent;
if (!chatResContent.EndsWith(endStr))
@ -271,7 +279,7 @@ namespace VideoAnalysisCore.AICore.GPT
/// <returns></returns>
/// <exception cref="Exception"></exception>
public virtual Task<T> ChatAsync<T>(string task, string postMessages, string title,
string model = null, int max_tokens = 16000)
string model = null, int max_tokens = 16000) where T : class, new()
{
throw new Exception("需要实现");
}

View File

@ -22,6 +22,7 @@ using System.Collections.Generic;
using UserCenter.Model.Enum;
using Dm.filter;
using System.Text.RegularExpressions;
using System.Diagnostics;
namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{
@ -37,6 +38,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
private readonly RedisManager redisManager;
private readonly Repository<VideoTask> videoTaskDB;
private readonly Repository<VideoKonwPoint> videoKonwPointDB;
private readonly Repository<VideoTaskStage> videoTaskStageDB;
private readonly Repository<VideoQuestion> videoQuestionDB;
private readonly Repository<VideoQuestionKonw> videoQuestionKonwDB;
private readonly Repository<KnowledgeInfo> knowledgeInfoDB;
@ -49,7 +51,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
/// <param name="logger"></param>
public GTP_Analysis_1(DeepSeekGPTClient moonshotClient, Repository<CourseGradingCriteria> criteria, Repository<VideoTask> videoTaskDB,
Repository<KnowledgeInfo> knowledgeInfoDB, Repository<VideoKonwPoint> videoKonwPointDB, SimpLetexClient simpLetexClient,
Repository<VideoQuestion> videoQuestionDB, OssClient ossClient, Repository<VideoQuestionKonw> videoQuestionKonwDB, RedisManager redisManager, BestAIClient chatGPTClient, GeminiGPTClient geminiClient)
Repository<VideoQuestion> videoQuestionDB, OssClient ossClient, Repository<VideoQuestionKonw> videoQuestionKonwDB, RedisManager redisManager, BestAIClient chatGPTClient, GeminiGPTClient geminiClient, Repository<VideoTaskStage> videoTaskStageDB)
{
deepSeekClient = moonshotClient;
criteriaDB = criteria;
@ -63,12 +65,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
this.redisManager = redisManager;
this.chatGPTClient = chatGPTClient;
this.geminiClient = geminiClient;
this.videoTaskStageDB = videoTaskStageDB;
}
/// <summary>
/// 获取分段内容对应的章节知识点
/// </summary>
/// <returns></returns>
private async Task<List<VideoKonwPoint>> GetVideoKnow(VideoKnowRes[] questionRes, VideoTask taskInfo,
private async Task<List<VideoKonwPoint>> GetVideoKnow(List<VideoKnowRes> questionRes, VideoTask taskInfo,
string sections, List<KnowledgeInfo> knowledgeInfos)
{
var knows = string.Join(',', knowledgeInfos.Select(s => s.Id + "|" + s.Name));
@ -77,30 +80,44 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
.GroupBy(s => s.Name)
.ToDictionary(s => s.First().Name, s => s.First().Id);
questionRes = questionRes.Where(s => s != null)
.OrderBy(s => s.StartTime).ToArray();
.OrderBy(s => s.StartTime).ToList();
var thems = questionRes.Adapt<VideoKnowQueryDto[]>().ToJson();
var checkResFormat1 = """[{"StartTime":开始秒(number),"KnowPoint":知识点名称(string),"KnowPointId":知识点Id(string)}]""";
var checkResFormat1 = """[{"StartTime":12.3,"TextbookSource":"","KnowPoints":[{"KnowPointWeight":0.5,"KnowSource":"(),","KnowPoint":"","KnowPointId":"123"}]}]""";
var knowMessages =
$"我针对{taskInfo.Subject}课堂授课视频分析出了视频的授课阶段片段。\n" +
$"现在需要你通过每个片段的内容总结来分配正确的知识点(单个片段允许多个知识点用逗号','分割)。\n" +
$"这是我的分段 {thems}。\n" +
$"课堂内容与{sections}章节相关\n" +
$"最后请确保分配的知识点是用户提供的,并且一定正确合理!\n" +
$"返回的片段数量与传入片段数量一致(硬性条件)!\n" +
$"输出内容只返回json格式({checkResFormat1})\n" +
$" 格式 (方法点Id|方法点名称) \n" +
$"提供的`知识点名称({knows})。\n";
$"""
{taskInfo.Subject}
- TextbookSource///
- KnowPoints
- KnowPoint
- KnowPointIdID KnowPoint
- KnowPointWeight(1,1)
- KnowSource,(50)
1) //
2) StartTime StartTime
3) KnowPoints
4) JSON Markdown//
{thems}
{sections}
Id|Name{knows}
{checkResFormat1}
""";
await redisManager.AddTaskLog(taskInfo.Id, "==>2.开始分析视频内容知识点");
VideoKnowRes[] konwRes;
List<VideoKnowRes> konwRes;
var knowOK = false;
var chatClentArr = new GPTClient[] { chatGPTClient, geminiClient,deepSeekClient };
var chatClentArr = new GPTClient[] { chatGPTClient, geminiClient, deepSeekClient };
for (int i = 0; i < 3; i++)
{
konwRes = await chatClentArr[i].ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), knowMessages, "知识点");
konwRes = await chatClentArr[i].ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), knowMessages, "知识点");
// 分析结果的片段数量与预期不匹配
if (questionRes.Length != konwRes.Length) continue;
if (questionRes.Count() != konwRes.Count()) continue;
for (int xi = 0; xi < konwRes.Count(); xi++)
questionRes[xi].KnowPoint = konwRes[xi].KnowPoint;
{
questionRes[xi].KnowPoints = konwRes[xi].KnowPoints;
questionRes[xi].TextbookSource = konwRes[xi].TextbookSource;
}
knowOK = true;
break;
}
@ -111,13 +128,12 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
}
return questionRes
.Where(s => !string.IsNullOrEmpty(s.KnowPoint))
.Where(s => s.KnowPoints != null && s.KnowPoints.Length > 0)
.SelectMany(
s =>
{
var ks = s.KnowPoint.Split(",").Distinct();
var StageId = Yitter.IdGenerator.YitIdHelper.NextId();
return ks.Where(x => knowDic.ContainsKey(x))
return s.KnowPoints.Where(x => knowDic.ContainsKey(x.KnowPoint))
.Select(x => new VideoKonwPoint()
{
Content = s.Content,
@ -125,8 +141,11 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
StartTime = s.StartTime,
EndTime = s.EndTime,
StageId = StageId,
KnowPoint = x,
KnowPointId = knowDic[x].ToString(),
KnowPoint = x.KnowPoint,
KnowPointWeight=x.KnowPointWeight,
TextbookSource = s.TextbookSource,
KnowSource = x.KnowSource,
KnowPointId = knowDic[x.KnowPoint].ToString(),
TagId = taskInfo.TagId,
VideoTaskId = taskInfo.Id,
CloudSchoolId = taskInfo.CloudSchoolId,
@ -183,29 +202,28 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
/// 检查AI切片结果质量
/// </summary>
/// <returns></returns>
private async Task<CheckMessageDto> VerifySpanQuality(VideoKnowRes[] questionRes, VideoTask taskInfo, TotalCaptionsDto captions, string sections, long course_Id)
private async Task<CheckMessageDto> VerifySpanQuality(List<VideoKnowRes> questionRes, VideoTask taskInfo, TotalCaptionsDto captions, string sections, long course_Id)
{
//校验结果质量
var thems = questionRes.Adapt<VideoKnowQueryDto[]>().ToJson();
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.
? "这堂课是习题课,所讲解内容几乎都是试题。"
: string.Empty;
var checkResFormat = """{"Score":打分(number),"MinusScore":简洁的扣分原因(string)",Suggestion":改进建议(string)""";//,"Data":优化后的分段(array)}""";
var checkResFormat = """{"Score":65.5,"MinusScore":"","Suggestion":""}""";
var checkMessage =
$"""
40
{pptFormat}
{sections}
Content是否与对应时间段内的字幕文本内容匹配(,)
Theme/Conten匹配,()
Conten有关联()
()
0-10070
0-10070,,
MinusScore:
Suggestion: ()
{thems}
:::|{captions.Captions}
@ -214,6 +232,50 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
""";
return await chatGPTClient.ChatAsync<CheckMessageDto>(taskInfo.Id.ToString(), checkMessage, "结果检查");
}
/// <summary>
/// 采用改进意见
/// </summary>
/// <param name="questionRes"></param>
/// <param name="taskInfo"></param>
/// <param name="captions"></param>
/// <param name="sections"></param>
/// <param name="suggestion"></param>
/// <returns></returns>
private async Task<List<VideoKnowRes>?> ImproveSpanBySuggestion(List<VideoKnowRes> questionRes, VideoTask taskInfo, TotalCaptionsDto captions, string sections, string suggestion)
{
if (string.IsNullOrWhiteSpace(suggestion))
return null;
var thems = questionRes.ToJson();
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.
? "这堂课是习题课,所讲解内容几乎都是试题。"
: string.Empty;
var resFormat = """[{"StartTime":0.0,"EndTime":12.3,"Theme":"","Content":"","KnowPoint":"()"}]""";
var message =
$"""
使
{pptFormat}
{sections}
1)
2) StartTime
3) Content
4)
5) JSON
{thems}
{suggestion}
:::|{captions.Captions}
JSON{resFormat}
""";
var improved = await geminiClient.ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), message, "分段优化");
if (improved is null || improved.Count() != questionRes.Count())
return null;
return improved.OrderBy(s => s.StartTime ?? 0).ToList();
}
/// <summary>
/// 优化字幕
@ -222,7 +284,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
private async Task<SenseVoiceRes[]> OptimizeSubtitles(VideoTask taskInfo,
SenseVoiceRes[] captionsArr, string sections)
{
if (!string.IsNullOrEmpty(taskInfo.CaptionsAI))
if (!string.IsNullOrEmpty(taskInfo.CaptionsAI) && taskInfo.CaptionsAI!="[]")
return JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.CaptionsAI);
var subject = taskInfo.Subject.ToString();
var newCaptionsList = new List<SenseVoiceRes>(captionsArr.Length);
@ -230,7 +292,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var totalCount = captionsArr.Length / spanCount + 1;
await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化");
var chatClentArr = new GPTClient[] { deepSeekClient,chatGPTClient, geminiClient };
var chatClentArr = new GPTClient[] { deepSeekClient, chatGPTClient, geminiClient };
await Parallel.ForAsync(0, totalCount,
new ParallelOptions() { MaxDegreeOfParallelism = 1 },
async (s, c) =>
@ -257,10 +319,10 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
$"待优化字幕内容:\n" +
$"{nowCaptionStr}\n" +
$"最终核对:请确保输出 JSON 中包含的字幕条数与输入的字幕条数完全对应。";
string[]? resData = null;
List<string>? resData = null;
for (int i = 0; i < 3; i++)
{
resData = await chatClentArr[i].ChatAsync<string[]>(taskInfo.Id.ToString(), postMessages, "优化字幕", ChatGPTType.Deepseek_Chat, 8000);
resData = await chatClentArr[i].ChatAsync<List<string>>(taskInfo.Id.ToString(), postMessages, "优化字幕", ChatGPTType.Deepseek_Chat, 8000);
if (resData.Count() == cArr.Count())
break;
else
@ -269,7 +331,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
if (resData.Count() != cArr.Count())
{
resData = cStrArr.ToArray();
resData = cStrArr.ToList();
await redisManager.AddTaskLog(taskInfo.Id, $"==>字幕优化 分段{s} AI结果数量不匹配 采用原始值");
}
newCaptionsList.AddRange(resData.Select((text, i) => new SenseVoiceRes()
@ -293,7 +355,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
/// 视频AI分析字幕
/// </summary>
/// <returns></returns>
private async Task<VideoKnowRes[]> Analytics(VideoTask taskInfo,
private async Task<List<VideoKnowRes>> Analytics(VideoTask taskInfo,
TotalCaptionsDto captions, string sections)
{
var tryCount = 10;
@ -306,32 +368,30 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
? $"请分析授课中字幕描述的知识内容,然后基于视频整体知识点讲解提炼出不同的阶段以便对老师上课内容切片提取为知识库,所以请确保阶段的内容准确性"
: $"授课中老师的PPT在这些时间段内进行了切换{taskInfo.PPTKeyFrame},理应这些时间段内的讲述内容也发生了变化,请你基于PPT变化时间点结合字幕描述的知识内容提炼出不同的切片。" +
$"每个阶段的起始和结束应接近这些时间点(例如,以时间点为中心,扩展至内容自然过渡处)。";
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]""";
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":阶段主题(string),"Content":内容总结(string)}]""";
var reviewStr = taskInfo?.VideoType == AttachmentsInfoType.
? $"但本堂课是习题课,所以大部分阶段是不同的例题讲解内容。\n"
: string.Empty;
var postMessages = string.Empty;
postMessages =
$"请通过视频字幕内容分析出视频中课堂的授课知识点切片\n" +
$"阶段的细分程度到某个知识点的讲解/认识/例题/总结\n" +
$"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。\n" +
$"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。\n" +
reviewStr +
$"讲解知识内容的阶段的细分程度到某个知识点的讲解/认识/例题/总结\n" +
$"不分析课堂作业相关的内容我已经预处理了\n" +
$"初步划分阶段:{keyFrameStr}\n" +
$"\n" +
$"内容分析:对每个时间段,提取主要讲解内容:识别关键词(如“例题”“证明”“练习”“总结”)和内容结构。\n" +
$"判断阶段类型:如果内容以解题为主,归类为“例题精讲”;如果涉及新知识讲解,归类为“新知讲解”;以此类推。\n" +
$"内容总结简述该阶段的核心讲解内容70~200字,确保内容与阶段时间内授课内容符合。\n" +
$"阶段主题:基于内容总结,提炼一个恰当的主题(例如,“柯西不等式的基本应用”)。\n" +
$"输出要求:确保阶段划分合理、无` 重叠,且时长符合要求,并且每个阶段的时长需要超过60秒如果时长不够去考虑合并到相邻的阶段\n" +
$"Stage判断阶段类型如果内容以解题为主归类为“例题精讲”如果涉及新知识讲解归类为“新知讲解”以此类推。\n" +
$"Content简述单个阶段的核心讲解内容40~150字如“例题”“证明”“练习”“总结”..., 必须完全基于字幕文本可推断的信息,禁止捏造不存在的内容(硬性条件)。\n" +
$"Theme理解Content提炼一个精确的主题例如“柯西不等式的基本应用”。\n" +
$"输出要求:确保阶段划分合理、无重叠、\n" +
$"作业布置阶段一般出现在末尾如果有" +
$"输出格式要求内容只返回json格式({resFormat})\n" +
$"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。\n" +
$"字幕列表 {captions.Captions} 字幕结束!";
await redisManager.AddTaskLog(taskInfo.Id, $"开始分析视频内容 {tryCount}");
//return await chatGPTClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
var res = await geminiClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
//var r2 = await chatClient.ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
var res = await geminiClient.ChatAsync<List<VideoKnowRes>>(taskInfo.Id.ToString(), postMessages, "分析字幕");
return res;
}
catch (Exception ex)
@ -342,6 +402,122 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
return null;
}
private async Task<VideoKnowRes?> DetectHomeworkAssignment(VideoTask taskInfo, TotalCaptionsDto captions, string sections)
{
if (captions is null || string.IsNullOrWhiteSpace(captions.Captions))
return null;
var parts = captions.Captions
.Split('|', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (parts.Length == 0)
return null;
var tail = string.Join('|', parts.Skip(Math.Max(0, parts.Length - 80)));
var resFormat = """{"StartTime":123.4,"EndTime":456.7,"Stage":"|","Theme":"","Content":"()"}""";
var pptFormat = taskInfo.VideoType == AttachmentsInfoType.
? "这堂课是习题课,作业可能是布置练习/订正/讲义整理。"
: string.Empty;
var message =
$"""
80 VideoKnowRes
{pptFormat}
{sections}
////////////
1)
2) Stage Content
3) Stage Theme
4) StartTime/EndTime
4) JSON
50 :::|
{tail}
JSON{resFormat}
""";
var res = await deepSeekClient.ChatAsync<VideoKnowRes>(taskInfo.Id.ToString(), message, "作业布置识别", ChatGPTType.Deepseek_Chat, 8000);
if (res is null)
return null;
if (!string.Equals(res.Stage, "作业布置", StringComparison.OrdinalIgnoreCase))
return null;
if (string.IsNullOrWhiteSpace(res.Content))
return null;
return res;
}
private VideoKnowRes[] MergeHomeworkStage(VideoKnowRes[] segments, VideoKnowRes homeworkStage, float maxVideoTime)
{
if (homeworkStage is null)
return segments;
if (segments is null)
segments = [];
var ordered = segments
.Where(s => s != null)
.OrderBy(s => s.StartTime ?? 0)
.ToList();
if (ordered.Any(s =>
(!string.IsNullOrWhiteSpace(s.Stage) && s.Stage.Contains("作业")) ||
(!string.IsNullOrWhiteSpace(s.Theme) && s.Theme.Contains("作业"))))
return ordered.ToArray();
var end = homeworkStage.EndTime ?? maxVideoTime;
if (end <= 0)
return ordered.ToArray();
var start = homeworkStage.StartTime ?? Math.Max(0, end - 120);
if (end - start < 1)
{
start = Math.Max(0, end - 30);
if (end - start < 1)
end = start + 30;
}
if (maxVideoTime > 0 && end > maxVideoTime)
end = maxVideoTime;
if (ordered.Count > 0)
{
var last = ordered[^1];
var lastStart = last.StartTime ?? 0;
var lastEnd = last.EndTime ?? lastStart;
if (start - lastEnd > 40)
start = lastEnd;
if (start <= lastStart)
start = lastStart + 0.01f;
if (start >= end)
{
end = start + 30;
if (maxVideoTime > 0 && end > maxVideoTime)
end = maxVideoTime;
}
if (last.EndTime is null || last.EndTime > start)
last.EndTime = start;
}
var homeworkContent = homeworkStage.Content;
var homeworkTheme = string.IsNullOrWhiteSpace(homeworkStage.Theme) ? "课后作业布置" : homeworkStage.Theme;
ordered.Add(new VideoKnowRes()
{
StartTime = start,
EndTime = end,
Stage = "作业布置",
Theme = homeworkTheme,
Content = homeworkContent
});
return ordered.ToArray();
}
@ -411,7 +587,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
$"输出内容只返回json格式为({resFormat})" +
$"以下是试题内容" +
$"`{sRes.Result.res.value}`";
var resData = await deepSeekClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
var resData = await deepSeekClient.ChatAsync<List<VideoQuestionOSSDto>>(taskInfo.Id.ToString(), postMessages, "提取试题");
//var resData = await chatClient.ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题");
if (resData is null || resData.Count() == 0)
break;
@ -524,25 +700,74 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
captionsArr = await OptimizeSubtitles(taskInfo, captionsArr, sections);
//合并字幕
var captions = ExpandFunction.GetSpeakerCaptions(captionsArr);
var homework = await DetectHomeworkAssignment(taskInfo, captions, sections);
if (homework != null)
{
await redisManager.AddTaskLog(taskInfo.Id, $"==>识别到作业布置 {homework.Content}");
await redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Homework", homework);
}
var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End ?? 0;
VideoKnowRes[]? questionRes = null;
List<VideoKnowRes>? questionRes = null;
var tryCount = 20;
while (tryCount-- > 0)
{
//视频字幕分析
questionRes = await Analytics(taskInfo, captions, sections);
if (questionRes is null) continue;
//处理分段 知识点
var insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);
List<VideoKonwPoint> insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);
//校验结果质量
var checkRes = await VerifySpanQuality(questionRes, taskInfo, captions, sections, Course_Id);
await redisManager.AddTaskLog(taskInfo.Id, $"==>课堂内容AI分析结果 得分=>{checkRes.Score}");
await redisManager.AddTaskLog(taskInfo.Id, $"==>改进意见 {checkRes.Suggestion}");
await redisManager.AddTaskLog(taskInfo.Id, $"==>扣分原因 {checkRes.MinusScore}");
// 质量复检
if (checkRes != null)
{
var improved = await ImproveSpanBySuggestion(questionRes, taskInfo, captions, sections, "扣分原因 {checkRes.MinusScore} \n 改进意见 {checkRes.Suggestion}");
if (improved != null)
{
var improvedCheck = await VerifySpanQuality(improved, taskInfo, captions, sections, Course_Id);
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后复检得分=>{improvedCheck.Score}");
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化后扣分原因 {improvedCheck.MinusScore}");
if (checkRes != null && checkRes.Score >= 85)
if (improvedCheck != null && improvedCheck.Score >= 90 && improvedCheck.Score > checkRes.Score)
{
questionRes = improved;
if (homework != null && (!questionRes.Any(s => s.Stage == StageEnum..ToString())))
questionRes.Add(homework);
insertData = await GetVideoKnow(questionRes, taskInfo, sections, knowledgeInfos);
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
await videoTaskStageDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);
var tStage = insertData.GroupBy(s => s.StageId).Select(s => new VideoTaskStage
{
Id = s.Key,
TagId = s.First().TagId,
CloudSchoolId = s.First().CloudSchoolId,
StartTime = s.First().StartTime,
EndTime = s.First().EndTime,
Content = s.First().Content,
TextbookSource = s.First().TextbookSource,
Stage = s.First().Stage,
Theme = s.First().Theme,
VideoTaskId = taskInfo.Id,
}).ToArray();
await videoTaskStageDB.InsertRangeAsync(tStage);
await videoKonwPointDB.InsertRangeAsync(insertData);
break;
}
else
{
await redisManager.AddTaskLog(taskInfo.Id, $"==>优化之后的得分降低/得分过低");
continue;
}
}
}
if (checkRes != null && checkRes.Score >= 90)
{
//写入知识点
await videoKonwPointDB.DeleteAsync(s => s.VideoTaskId == taskInfo.Id);

View File

@ -42,7 +42,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
/// <returns></returns>
/// <exception cref="Exception"></exception>
public override async Task<T> ChatAsync<T>(string task, string postMessages, string title,
string model = null, int max_tokens = 16000)
string model = null, int max_tokens = 32_000)
{
Message[] messageArr = [
new Message(postMessages,"user"),
@ -57,7 +57,8 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT
max_tokens = max_tokens,
stream = true,
temperature = 0.2f,
messages = messageArr
messages = messageArr,
max_completion_tokens= 12288,
};
chatReq.modalities = null;

View File

@ -0,0 +1,134 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using SherpaOnnx;
using SqlSugar.IOC;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using VideoAnalysisCore.Common;
using VideoAnalysisCore.Model;
using VideoAnalysisCore.Model.Enum;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
public static class FunASRNanoExpand
{
/// <summary>
/// 添加 SenseVoice 语音转文字
/// </summary>
/// <param name="services"></param>
public static void AddFunASRNanoExpand(this IServiceCollection services)
{
services.AddSingleton<FunASRNano>();
}
}
/// <summary>
/// 基于 sherpa-onnx 平台接入的 Fun-ASR-Nano-2512
/// <para>版本 Fun-ASR-Nano-2512</para>
/// <para>来源 https://github.com/modelscope/FunASR/blob/main/README_zh.md</para>
/// </summary>
public class FunASRNano
{
public static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider;
public FunASRNano( RedisManager redisManager, IServiceProvider serviceProvider)
{
this.serviceProvider = serviceProvider;
}
/// <summary>
/// 初始化 SenseVoice
/// </summary>
/// <param name="numThreads">默认6线程</param>
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境 <see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
public void Init(int numThreads = 6, bool useGPU = false, bool useHotwords = false)
{
Console.WriteLine("初始化 FunASRNano");
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
//采样率
config.FeatConfig.SampleRate = 16000;
//用于训练模型的特征维度
config.FeatConfig.FeatureDim = 80;
var topFolder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-funasr-nano-fp16-2025-12-30");
//模型配置
//将非结构化数据(文本、图像、音频等)转换为低维稠密向量
config.ModelConfig.FunAsrNano.EncoderAdaptor = Path.Combine(topFolder, "encoder_adaptor.int8.onnx");
//接入的大语言模型
config.ModelConfig.FunAsrNano.LLM = Path.Combine(topFolder, "llm.fp16.onnx");
//插入预训练模型如Transformer的小型可训练模块 (如语音识别、情感分析)
config.ModelConfig.FunAsrNano.Embedding = Path.Combine(topFolder, "embedding.int8.onnx");
//分词器
config.ModelConfig.FunAsrNano.Tokenizer = Path.Combine(topFolder, "Qwen3-0.6B");
//提示词
config.ModelConfig.FunAsrNano.SystemPrompt = "You are a professional video audio transcription assistant.";
config.ModelConfig.FunAsrNano.UserPrompt = "这是一堂中国的课堂视频音频,请你帮我分析出它讲述的内容!";
config.ModelConfig.FunAsrNano.MaxNewTokens = 512;
config.ModelConfig.FunAsrNano.Temperature = 1E-06f;
config.ModelConfig.FunAsrNano.TopP = 0.8f;
config.ModelConfig.FunAsrNano.Seed = 42;
//模型类型
config.ModelConfig.ModelType = string.Empty;
config.ModelConfig.NumThreads = numThreads;
config.ModelConfig.Provider = "cpu";
//需要使用GPU
if (!useGPU)
config.ModelConfig.Provider = "cuda";
#if DEBUG
config.ModelConfig.Debug = 1;
#endif
OR = new OfflineRecognizer(config);
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
public List<SenseVoiceRes> RunTask(Stream s)
{
if (s is null) throw new Exception("音频路径 is null");
if (OR is null) Init();
return serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.ten_vad_324);
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="task"></param>
/// <returns></returns>
public Task RunTask(string task)
{
var filePath = Path.Combine(task.LocalPath(), "task.wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
if (OR is null) Init();
serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(filePath), null, SoundHandle, SherpaVadVersion.ten_vad_324);
return Task.CompletedTask;
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="sampleRate">采样率</param>
/// <param name="samples">采样值(样品)</param>
/// <returns>结果流</returns>
public OfflineStream SoundHandle(int sampleRate, float[] samples)
{
var stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, samples);
OR.Decode(stream);
return stream;
}
}
}

View File

@ -1,4 +1,5 @@
using Microsoft.Extensions.DependencyInjection;
using Dm.util;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using SherpaOnnx;
using SqlSugar.IOC;
@ -14,7 +15,6 @@ using System.Threading.Tasks;
using VideoAnalysisCore.Common;
using VideoAnalysisCore.Model;
using VideoAnalysisCore.Model.Enum;
using static System.Runtime.InteropServices.JavaScript.JSType;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
@ -32,22 +32,21 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
}
public class SenseVoice
{
//const string TransducerStr = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
static OfflineRecognizer OR = default!;
static OfflineRecognizer OR_old = default!;
static VadModelConfig VADModelConfig = default!;
public Repository<VideoTask> videoTaskDB { get; set; }
public static OfflineRecognizer OR = default!;
private readonly IServiceProvider serviceProvider;
private readonly RedisManager redisManager;
public static OfflineRecognizer OR1 = default!;
//测试用
public static List<(string z1,string z2)> cachedValue = new List<(string z1, string z2)>();
public SenseVoice(Repository<VideoTask> videoTaskDB, RedisManager redisManager)
public SenseVoice(RedisManager redisManager, IServiceProvider serviceProvider)
{
this.videoTaskDB = videoTaskDB;
this.redisManager = redisManager;
this.serviceProvider = serviceProvider;
}
/// <summary>
/// 初始化 SenseVoice
/// 初始化 SenseVoice
/// </summary>
/// <param name="numThreads">默认6线程</param>
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境<see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
@ -61,10 +60,9 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
config.FeatConfig.FeatureDim = 80;
// Path to tokens.txt
var AIModelVersion_270717 = "sherpa-onnx-sense-voice-24-07-17";
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
//SenseVoice 模型
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
config.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
//反转文本规范化规则 fst 的路径
@ -91,54 +89,34 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
//config.MaxActivePaths =4;
#endregion
#region []
//if (false)
//{
// //热词目录
// config.HotwordsFile = Path.Combine(AppCommon.AIModelFile, "Hotwords.txt");
// config.DecodingMethod = "modified_beam_search";
// //热词得分
// config.HotwordsScore = 1.5f;
// config.ModelConfig.ModelingUnit = "cjkchar+bpe";
// config.ModelConfig.BpeVocab = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "bpe.model");
// config.ModelConfig.Transducer = new OfflineTransducerModelConfig()
// {
// Decoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "decoder-epoch-99-avg-1.onnx"),
// Encoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "encoder-epoch-99-avg-1.onnx"),
// Joiner = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "joiner-epoch-99-avg-1.onnx"),
// };
//}
#endregion
#if DEBUG
config.ModelConfig.Debug = 1;
#endif
OR = new OfflineRecognizer(config);
OfflineRecognizerConfig oldConfig = new OfflineRecognizerConfig();
//采样率
oldConfig.FeatConfig.SampleRate = 16000;
oldConfig.FeatConfig.FeatureDim = 80;
oldConfig.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "tokens.txt");
oldConfig.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "model.onnx");
oldConfig.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
//反转文本规范化规则 fst 的路径
//config.RuleFsts = Path.Combine(AppCommon.AIModelFile, "itn_subject_sx.fst");
oldConfig.ModelConfig.SenseVoice.Language = "zh";
//模型类型
oldConfig.ModelConfig.ModelType = string.Empty;
oldConfig.ModelConfig.NumThreads = numThreads;
oldConfig.ModelConfig.Provider = "cpu";
OR_old = new OfflineRecognizer(oldConfig);
VADModelConfig = new VadModelConfig();
VADModelConfig.SileroVad.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_270717, "silero_vad.onnx");
VADModelConfig.Debug = 0;
var AIModelVersion_251217 = "sherpa-onnx-sense-voice-funasr-nano-2025-12-17";
OfflineRecognizerConfig config1 = new OfflineRecognizerConfig();
config1.FeatConfig.SampleRate = 16000;
config1.FeatConfig.FeatureDim = 80;
config1.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "tokens.txt");
config1.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, AIModelVersion_251217, "model.onnx");
//1 使用逆文本规范化处理感官语音 [控制标点符号生成]。
config1.ModelConfig.SenseVoice.UseInverseTextNormalization = 1;
config1.ModelConfig.SenseVoice.Language = "zh";
config1.ModelConfig.ModelType = string.Empty;
config1.ModelConfig.NumThreads = numThreads;
config1.ModelConfig.Provider = "cpu";
config1.DecodingMethod = "greedy_search";
config1.ModelConfig.Debug = 1;
OR1 = new OfflineRecognizer(config: config1);
//OR1 = FunASRNano.OR;
}
/// <summary>
@ -146,137 +124,44 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
public async Task<List<SenseVoiceRes>> RunTask(Stream s)
public List<SenseVoiceRes> RunTask(Stream s)
{
if (s is null)
throw new Exception("音频路径 is null");
return await TaskHandle(new WaveReader(s), null);
if (s is null) throw new Exception("音频路径 is null");
if (OR is null) Init();
return serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(s), null, SoundHandle, SherpaVadVersion.silero_vad_v5);
}
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="task"></param>
/// <returns></returns>
public async Task RunTask(string task)
public Task RunTask(string task)
{
var filePath = Path.Combine(task.LocalPath(), "task.wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
await TaskHandle(new WaveReader(filePath), task);
if (OR is null) Init();
serviceProvider.GetRequiredService<SherpaVad>()
.TaskHandle(new WaveReader(filePath), task, SoundHandle, SherpaVadVersion.silero_vad_v5);
return Task.CompletedTask;
}
/// <summary>
/// 任务处理
/// 获取语音字幕
/// </summary>
/// <param name="reader">Wave</param>
/// <param name="task">任务id [默认Null]</param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public async Task<List<SenseVoiceRes>> TaskHandle(WaveReader reader, string? task )
/// <param name="sampleRate">采样率</param>
/// <param name="samples">采样值(样品)</param>
/// <returns>结果流</returns>
public OfflineStream SoundHandle(int sampleRate, float[] samples)
{
if (OR is null)
Init();
int numSamples = reader.Samples.Length;
int windowSize = VADModelConfig.SileroVad.WindowSize;
int sampleRate = VADModelConfig.SampleRate;
int numIter = numSamples / windowSize;
var totalSecond = numSamples / (float)sampleRate;
var res = new List<SenseVoiceRes>(500);
using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 20);
for (int i = 0; i != numIter; ++i)
{
int start = i * windowSize;
float[] samples = new float[windowSize];
Array.Copy(reader.Samples, start, samples, 0, windowSize);
VAD.AcceptWaveform(samples);
//Memory<float> samples = new float[windowSize];
//Memory<float> sourceSpan = reader.Samples.AsMemory(start, windowSize);
//sourceSpan.CopyTo(samples);
//VAD.AcceptWaveform(samples.ToArray());
//是否检测到语音
if (VAD.IsSpeechDetected())
{
//获取最新的发言片段
while (!VAD.IsEmpty())
{
var p = await ReadNext(VAD,res, totalSecond);
if (p != null) redisManager.SetTaskProgress(task, p + "%");
}
}
}
VAD.Flush();
while (!VAD.IsEmpty())
{
var p = await ReadNext(VAD, res, totalSecond);
if(p!= null) redisManager.SetTaskProgress(task, p + "%");
}
//如果携带任务ID
if (!string.IsNullOrEmpty(task))
{
await redisManager.AddTaskLog(task, "==> SenseVoice 字幕数量" + res.Count);
var captionsStr = res.ToJson();
await videoTaskDB.AsUpdateable()
.SetColumns(it => it.Captions == captionsStr)
.Where(it => it.Id == long.Parse(task))
.ExecuteCommandAsync();
await redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Captions", res);
//分析完成视频字幕后继续接收任务
//redisManager.NewTask();
}
return res;
}
/// <summary>
/// 处理vad 下一个切片
/// </summary>
/// <param name="VAD"></param>
/// <param name="res">字幕处理后写入数组</param>
/// <param name="totalSecond">总时长</param>
/// <param name="progressCallback">任务回调</param>
/// <returns></returns>
public async Task<double?> ReadNext(VoiceActivityDetector VAD, List<SenseVoiceRes> res, float totalSecond)
{
var segment = VAD.Front();
var sampleRate = VADModelConfig.SampleRate;
var sampleRateF = (float)VADModelConfig.SampleRate;
float startTime = segment.Start / sampleRateF;
float duration = segment.Samples.Length / sampleRateF;
using var stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
var stream = OR.CreateStream();
stream.AcceptWaveform(sampleRate, samples);
OR.Decode(stream);
//old
using var stream1 = OR_old.CreateStream();
stream1.AcceptWaveform(sampleRate, segment.Samples);
OR.Decode(stream1);
if (stream.Result.Text != stream1.Result.Text)
{
Console.WriteLine("=>" + (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero));
Console.WriteLine("新=>" + stream.Result.Text);
Console.WriteLine("旧=>" + stream1.Result.Text);
}
Console.WriteLine();
double? resP =null;
if (!string.IsNullOrEmpty(stream.Result.Text))
{
var text = stream.Result.Text.Trim();
if (text.Length == 1 && text == "。")// 检查字符是否只有一个句号
{
VAD.Pop();
return resP;
}
res.Add(new()
{
Text = stream.Result.Text,
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
});
resP = Math.Round((double)(startTime + duration) / (totalSecond) * 100, 2);
}
VAD.Pop();
return resP;
return stream;
}
}
}

View File

@ -0,0 +1,210 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using SherpaOnnx;
using SqlSugar;
using SqlSugar.IOC;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using VideoAnalysisCore.Common;
using VideoAnalysisCore.Model;
using VideoAnalysisCore.Model.Enum;
using static System.Net.WebRequestMethods;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
public static class SherpaVadExpand
{
/// <summary>
/// 添加 Vad 语言切片
/// </summary>
/// <param name="services"></param>
public static void AddSherpaVadExpand(this IServiceCollection services)
{
services.AddTransient<SherpaVad>();
}
}
/// <summary>
/// 语音切片服务的版本
/// </summary>
public class SherpaVadVersion
{
public const string silero_vad_v4 = "silero_vad_v4.onnx";
public const string silero_vad_v5 = "silero_vad_v5.onnx";
/// <summary>
/// ten_vad (324 kb版本)
/// </summary>
public const string ten_vad_324 = "ten-vad.onnx";
}
/// <summary>
/// 语音切片服务
/// </summary>
public class SherpaVad
{
static VadModelConfig VADModelConfig = default!;
private readonly RedisManager redisManager;
private readonly IServiceProvider serviceProvider;
private readonly VoiceActivityDetector vad;
private Func<int, float[], OfflineStream> Callback;
public SherpaVad(RedisManager redisManager, IServiceProvider serviceProvider)
{
this.redisManager = redisManager;
this.serviceProvider = serviceProvider;
VADModelConfig = new VadModelConfig();
VADModelConfig.SampleRate = 16000;
VADModelConfig.NumThreads = 1;
VADModelConfig.Provider = "cpu";
#if DEBUG
VADModelConfig.Debug = 1;
#endif
VADModelConfig.SileroVad = new SileroVadModelConfig();
VADModelConfig.TenVad = new TenVadModelConfig();
}
/// <summary>
/// 初始化 SenseVoice
/// </summary>
/// <param name="func">vad识别成功后触发后回调</param>
/// <param name="vadVersion">版本采用 <see cref="SherpaVadVersion.silero_vad_v5"/> </param>
/// <param name="numThreads">默认1线程</param>
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境<see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
private void Init(Func<int, float[], OfflineStream> func, string vadVersion = SherpaVadVersion.silero_vad_v5, int numThreads = 1, bool useGPU = false)
{
VADModelConfig.NumThreads = numThreads;
VADModelConfig.Provider = useGPU? "cuda" : "cpu";
var path = Path.Combine(AppCommon.AIModelFile, "vad", SherpaVadVersion.silero_vad_v5);
switch (vadVersion)
{
case SherpaVadVersion.silero_vad_v4:
case SherpaVadVersion.silero_vad_v5:
VADModelConfig.SileroVad.Model = path;
break;
case SherpaVadVersion.ten_vad_324:
VADModelConfig.TenVad.Model = path;
break;
default:
break;
}
Callback = func;
}
/// <summary>
/// 任务处理
/// </summary>
/// <param name="reader">Wave</param>
/// <param name="func">vad识别成功后触发后回调</param>
/// <param name="vadVersion">版本采用 <see cref="SherpaVadVersion.silero_vad_v5"/> </param>
/// <param name="numThreads">默认1线程</param>
/// <param name="useGPU">是否使用gpu 报错请看安装CUDA环境<see cref="https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/large-v3.html#run-with-gpu-float32"/></param>
/// <param name="task">任务id [默认Null]</param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public List<SenseVoiceRes> TaskHandle(WaveReader reader, string? task,Func<int, float[], OfflineStream> func, string vadVersion = SherpaVadVersion.silero_vad_v5, int numThreads = 1, bool useGPU = false )
{
Init(func, vadVersion, numThreads, useGPU);
// 使用 Span 操作原始数据
ReadOnlySpan<float> allSamples = reader.Samples.AsSpan();
int numSamples = allSamples.Length;
int windowSize = VADModelConfig.SileroVad.WindowSize;
int sampleRate = VADModelConfig.SampleRate;
int numIter = numSamples / windowSize;
var totalSecond = numSamples / (float)sampleRate;
var res = new List<SenseVoiceRes>(500);
using var VAD = new VoiceActivityDetector(VADModelConfig, bufferSizeInSeconds: 30);
// 优化:复用缓冲区,避免在循环中重复分配内存
float[] buffer = new float[windowSize];
for (int i = 0; i != numIter; ++i)
{
int start = i * windowSize;
// 使用 Span 高效复制数据到固定缓冲区
allSamples.Slice(start, windowSize).CopyTo(buffer);
VAD.AcceptWaveform(buffer);
//是否检测到语音
if (VAD.IsSpeechDetected())
{
//获取最新的发言片段
while (!VAD.IsEmpty())
{
var p = ReadNext(VAD,res, totalSecond);
if (p != null) redisManager.SetTaskProgress(task, p + "%");
}
}
}
VAD.Flush();
while (!VAD.IsEmpty())
{
var p = ReadNext(VAD, res, totalSecond);
if(p!= null) redisManager.SetTaskProgress(task, p + "%");
}
//如果携带任务ID
if (!string.IsNullOrEmpty(task))
{
_ = redisManager.AddTaskLog(task, "==>字幕数量" + res.Count);
var captionsStr = res.ToJson();
_ = serviceProvider.GetRequiredService<Repository<VideoTask>>()
.AsUpdateable()
.SetColumns(it => it.Captions == captionsStr)
.Where(it => it.Id == long.Parse(task))
.ExecuteCommandAsync();
_ = redisManager.Redis.HMSetAsync(RedisExpandKey.Task(task), "Captions", res);
//分析完成视频字幕后继续接收任务
//redisManager.NewTask();
}
return res;
}
/// <summary>
/// 处理vad 下一个切片
/// </summary>
/// <param name="VAD"></param>
/// <param name="res">字幕处理后写入数组</param>
/// <param name="totalSecond">总时长</param>
/// <returns></returns>
public double? ReadNext(VoiceActivityDetector VAD, List<SenseVoiceRes> res, float totalSecond)
{
var segment = VAD.Front();
var sampleRate = VADModelConfig.SampleRate;
var sampleRateF = (float)VADModelConfig.SampleRate;
float startTime = segment.Start / sampleRateF;
float duration = segment.Samples.Length / sampleRateF;
using var stream = Callback(sampleRate, segment.Samples);
double? resP =null;
if (!string.IsNullOrEmpty(stream.Result.Text))
{
var text = stream.Result.Text.Trim();
if (text.Length == 1 && text == "。")// 检查字符是否只有一个句号
{
VAD.Pop();
return resP;
}
res.Add(new()
{
Text = stream.Result.Text,
Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero),
End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero),
});
resP = Math.Round((double)(startTime + duration) / (totalSecond) * 100, 2);
}
VAD.Pop();
return resP;
}
}
}

View File

@ -51,7 +51,7 @@ namespace VideoAnalysisCore.Common.Expand
public static class AliyunOSSExpand
{
/// <summary>
/// 使用阿里云 vod拓展
/// 使用阿里云 OSS拓展
/// </summary>
/// <param name="service"></param>
/// <returns></returns>

View File

@ -221,6 +221,11 @@ namespace VideoAnalysisCore.Common.Expand
/// </summary>
public static class SSimpLetexExtensions
{
/// <summary>
/// 公式图片识别
/// </summary>
/// <param name="services"></param>
/// <returns></returns>
public static IServiceCollection AddSimpleTexOcrClient(this IServiceCollection services)
{
services.AddSingleton<SimpLetexClient>();

View File

@ -19,6 +19,10 @@ namespace VideoAnalysisCore.Common.Expand
public static class SqlSugarExpand
{
public static bool ShowSQL = false;
/// <summary>
/// 数据库ORM拓展
/// </summary>
/// <param name="services"></param>
public static void AddSqlSugarExpand(this IServiceCollection services)
{

View File

@ -88,6 +88,10 @@ namespace VideoAnalysisCore.Common
Console.WriteLine($"{DateTime.Now}=>初始化 Redis任务队列");
service.AddSingleton<RedisInit>();
}
/// <summary>
/// redis连接拓展(包含消息队列任务)
/// </summary>
/// <param name="service"></param>
public static void AddRedisExpand(this IServiceCollection service)
{
Console.WriteLine($"{DateTime.Now}=>初始化 Redis");

View File

@ -35,6 +35,7 @@ namespace VideoAnalysisCore.Controllers
private readonly Repository<VideoTask> videoTaskDB;
private readonly Repository<CourseInfo> courseInfoDB;
private readonly Repository<VideoKonwPoint> videoKonwPointDB;
private readonly Repository<VideoTaskStage> videoTaskStageDB;
private readonly Repository<NodePackageInfo> nodePackageInfoDB;
private readonly Repository<VideoQuestion> videoQuestionDB;
private readonly Repository<VideoQuestionKonw> videoQuestionKonwDB;
@ -42,7 +43,7 @@ namespace VideoAnalysisCore.Controllers
public LJZK_Controller(IMapper mp, Repository<NodeSubscription> nodesubscriptionDB,
Repository<VideoTask> videoTaskDB = null, Repository<VideoKonwPoint> videoKonwPointDB = null
, Repository<NodePackageInfo> nodePackageInfoDB = null, Repository<VideoQuestion> videoQuestionDB = null, Repository<VideoQuestionKonw> videoQuestionKonwDB = null, Repository<CourseInfo> courseInfoDB = null, RedisManager redisManager = null)
, Repository<NodePackageInfo> nodePackageInfoDB = null, Repository<VideoQuestion> videoQuestionDB = null, Repository<VideoQuestionKonw> videoQuestionKonwDB = null, Repository<CourseInfo> courseInfoDB = null, RedisManager redisManager = null, Repository<VideoTaskStage> videoTaskStageDB = null)
{
this.mp = mp;
this.nodesubscriptionDB = nodesubscriptionDB;
@ -53,6 +54,7 @@ namespace VideoAnalysisCore.Controllers
this.videoQuestionKonwDB = videoQuestionKonwDB;
this.courseInfoDB = courseInfoDB;
this.redisManager = redisManager;
this.videoTaskStageDB = videoTaskStageDB;
}
@ -201,27 +203,49 @@ namespace VideoAnalysisCore.Controllers
.ToArrayAsync();
if (konwArr is null || konwArr.Length == 0)
return BadRequest("ÎÞÓÐЧÈÎÎñ·Ö¶Î");
var stageArr = await videoTaskStageDB.AsQueryable()
.Where(s => s.VideoTaskId == task.Id)
.ToArrayAsync();
var videoKnowDic = konwArr
.GroupBy(s => s.StageId)
.ToDictionary(s => s.Key);
var videoKnows = stageArr
.Select(s => new VideoKnowRes()
{
Content = s.Content,
StartTime = s.StartTime,
EndTime = s.EndTime,
Theme = s.Theme,
StageId = s.Id,
KnowPoint = videoKnowDic.ContainsKey(s.Id)
? string.Join(',', videoKnowDic[s.Id].Select(x => x.KnowPoint))
: string.Empty
}).ToArray();
var res = new TaskKnowRes()
{
TagId = task.TagId,
Status = task.LastEnum,
VideoTaskId = task.Id,
KnowBlockArr = konwArr
.GroupBy(s => s.StartTime)
KnowBlockArr = stageArr
.Select(s => new TaskKnowBlock()
{
Id = s.First().Id,
Content = s.First().Content,
StartTime = s.First().StartTime,
StageId = s.First().StageId,
EndTime = s.First().EndTime,
Theme = s.First().Theme,
Know = s.Select(x => new TaskKnowInfo()
{
Id = x.Id,
KnowPoint = x.KnowPoint,
KnowPointId = x.KnowPointId
}).ToArray()
Id = s.Id,
Content = s.Content,
StartTime = s.StartTime,
StageId = s.Id,
EndTime = s.EndTime,
Theme = s.Theme,
Know = videoKnowDic.ContainsKey(s.Id)
? videoKnowDic[s.Id]?.Select(x => new TaskKnowInfo()
{
Id = x.Id,
KnowPoint = x.KnowPoint,
KnowPointId = x.KnowPointId,
})?.ToArray()
: null
}).ToArray()
};
if (task.VideoType == AttachmentsInfoType.¸´Ï°)

View File

@ -4,6 +4,7 @@ using MapsterMapper;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.DependencyInjection;
using SqlSugar;
using System;
using System.Diagnostics;
@ -32,15 +33,19 @@ namespace VideoAnalysisCore.Controllers
readonly Repository<VideoTask> baseService;
readonly Repository<VideoQuestion> videoQuestionDB;
readonly Repository<VideoKonwPoint> videoKonwPointDB;
readonly Repository<VideoTaskStage> videoTaskStageDB;
readonly Repository<VideoQuestionKonw> videoQuestionKonwDB;
readonly Repository<TaskLog> taskLogDB;
readonly RedisManager redisManager;
public readonly SenseVoice senseVoice;
public readonly FunASRNano funASRNano;
private readonly IMapper mp;
public VideoTaskController(Repository<VideoTask> baseService, RedisManager redisManager,
Repository<VideoQuestion> videoQuestionDB,
Repository<VideoQuestionKonw> videoQuestionKonwDB, Repository<VideoKonwPoint> videoKonwPointDB, SenseVoice senseVoice, IMapper mp, Repository<TaskLog> taskLogDB) : base(baseService)
Repository<VideoQuestionKonw> videoQuestionKonwDB, Repository<VideoKonwPoint> videoKonwPointDB, SenseVoice senseVoice, IMapper mp, Repository<TaskLog> taskLogDB, FunASRNano funASRNano, Repository<VideoTaskStage> videoTaskStageDB) : base(baseService)
{
this.baseService = baseService;
this.redisManager = redisManager;
@ -50,6 +55,8 @@ namespace VideoAnalysisCore.Controllers
this.senseVoice = senseVoice;
this.mp = mp;
this.taskLogDB = taskLogDB;
this.funASRNano = funASRNano;
this.videoTaskStageDB = videoTaskStageDB;
}
@ -129,7 +136,7 @@ namespace VideoAnalysisCore.Controllers
using HttpClient client = new HttpClient();
// 发送GET请求获取网络文件流
using var networkStream = await client.GetStreamAsync(url);
var res = await senseVoice.RunTask(networkStream);
var res = senseVoice.RunTask(networkStream);
return Ok(res);
}
catch (Exception ex)
@ -143,13 +150,34 @@ namespace VideoAnalysisCore.Controllers
/// <param name="file">文件流</param>
/// <returns></returns>
[HttpPost(Name = "AudioRecognition")]
public async Task<IActionResult> AudioRecognition(IFormFile file)
public IActionResult AudioRecognition(IFormFile file)
{
using var s = file.OpenReadStream();
var res = await senseVoice.RunTask(s);
return Ok(res);
senseVoice.RunTask(s);
return Ok();
}
/// <summary>
/// 语音识别
/// </summary>
/// <param name="file">文件流</param>
/// <returns></returns>
[HttpPost(Name = "AudioRecognition_test")]
public IActionResult AudioRecognition_test(IFormFile file)
{
using var s = file.OpenReadStream();
var x = AppCommon.Services.GetService<FunASRNano>();
x.Init();
senseVoice.RunTask(s);
for (int i = 0; i < SenseVoice.cachedValue.Count(); i++)
{
Console.WriteLine($"字幕索引=>{i}");
Console.WriteLine($"ssv=>{SenseVoice.cachedValue[i].z1}");
Console.WriteLine($"fun=>{SenseVoice.cachedValue[i].z2}");
Console.WriteLine();
}
return Ok();
}
/// <summary>
/// 获取FTS_Data str
@ -345,17 +373,23 @@ namespace VideoAnalysisCore.Controllers
var konwArr = await videoKonwPointDB.AsQueryable()
.Where(s => s.VideoTaskId == nowTask.Id)
.ToArrayAsync();
var videoKnows = konwArr
.GroupBy(s => s.StartTime)
var stageArr = await videoTaskStageDB.AsQueryable()
.Where(s => s.VideoTaskId == nowTask.Id)
.ToArrayAsync();
var videoKnowDic = konwArr
.GroupBy(s => s.StageId)
.ToDictionary(s => s.Key);
var videoKnows = stageArr
.Select(s => new VideoKnowRes()
{
Content = s.First().Content,
StartTime = s.First().StartTime,
EndTime = s.First().EndTime,
Theme = s.First().Theme,
StageId = s.First().StageId,
KnowPoint = string.Join(',', s.Select(x => x.KnowPoint))
Content = s.Content,
StartTime = s.StartTime,
EndTime = s.EndTime,
Theme = s.Theme,
StageId = s.Id,
KnowPoint = videoKnowDic.ContainsKey(s.Id)
? string.Join(',', videoKnowDic[s.Id].Select(x => x.KnowPoint))
: string.Empty
}).ToArray();
if (nowTask.VideoType == AttachmentsInfoType.)
{

View File

@ -65,9 +65,18 @@ namespace VideoAnalysisCore.Model
/// </summary>
public string? KnowPointId { get; set; }
/// <summary>
/// 内容总结
/// 知识点占比权重
/// </summary>
public string? Content { get; set; }
public float? KnowPointWeight { get; set; }
/// <summary>
/// 知识点来源
/// </summary>
public string KnowSource { get; set; }
/// <summary>
/// 内容总结[不写入数据库]
/// </summary>
[SugarColumn(IsIgnore = true)]
public virtual string? Content { get; set; }
/// <summary>
/// 课程阶段
/// </summary>
@ -79,5 +88,11 @@ namespace VideoAnalysisCore.Model
/// </summary>
[SugarColumn(IsNullable = true)]
public long? CloudSchoolId { get; set; }
/// <summary>
/// 教材来源
/// <para> 课本/试卷/挹青苑 ...</para>
/// </summary>
[SugarColumn(IsIgnore = true)]
public virtual string? TextbookSource { get; set; }
}
}

View File

@ -0,0 +1,78 @@
using SqlSugar;
using System.ComponentModel.DataAnnotations;
using System.Net;
using System.Text.Json;
using UserCenter.Model.Enum;
using VideoAnalysisCore.AICore.GPT.Dto;
using VideoAnalysisCore.AICore.SherpaOnnx;
using VideoAnalysisCore.Model.Enum;
using VideoAnalysisCore.Model.Interface;
using Whisper.net;
namespace VideoAnalysisCore.Model
{
/// <summary>
/// 视频片段
/// </summary>
[SugarTable("videotaskstage")]
public class VideoTaskStage : IDB
{
/// <summary>
/// id
/// </summary>
[SugarColumn(IsPrimaryKey = true)]
public long Id { get; set; }
/// <summary>
/// 视频任务id
/// <see cref="VideoTask.Id"/>
/// </summary>
public long VideoTaskId { get; set; }
/// <summary>
/// 自定义Id [任务视频自定义id]
/// <see cref="VideoTask.TagId"/>
/// </summary>
[SugarColumn(Length = 500, IsNullable = true)]
public string? TagId { get; set; }
/// <summary>
/// 开始时间
/// </summary>
[SugarColumn( IsNullable = true)]
public float? StartTime { get; set; }
/// <summary>
/// 结束时间
/// </summary>
[SugarColumn(IsNullable = true)]
public float? EndTime { get; set; }
/// <summary>
/// 持续时间
/// </summary>
[SugarColumn(IsIgnore = true)]
public float? KeepTime => (EndTime ?? 0) - StartTime ?? 0;
/// <summary>
/// 主题
/// </summary>
public string? Theme { get; set; }
/// <summary>
/// 知识点来源 视频秒,来源原因
/// </summary>
public string Content { get; set; }
/// <summary>
/// 课程阶段
/// </summary>
[SugarColumn(IsIgnore = true)]
public virtual StageEnum? Stage { get; set; }
/// <summary>
/// 视频所属云校ID
/// <para><see cref="UserCenter.Model.CloudSchool"/> 用户中心的云校id</para>
/// </summary>
[SugarColumn(IsNullable = true)]
public long? CloudSchoolId { get; set; }
/// <summary>
/// 教材来源
/// <para> 课本/试卷/挹青苑 ...</para>
/// </summary>
[SugarColumn(IsNullable = true)]
public virtual string? TextbookSource { get; set; }
}
}

View File

@ -71,7 +71,7 @@
<PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" />
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.20" />
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.12.21" />
<PackageReference Include="SixLabors.ImageSharp" Version="3.1.7" />
<PackageReference Include="SqlSugar.IOC" Version="2.0.0" />
<PackageReference Include="SqlSugarCore" Version="5.1.4.205" />