From c14842a99d21e4fcc5710eb73d3994cc84d8791d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E8=82=A5=E7=BE=8A?= <1048382248@qq.com> Date: Thu, 23 Jan 2025 10:34:19 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=20=E5=AE=8C=E5=96=84gpt?= =?UTF-8?q?=E8=AF=B7=E6=B1=82=E5=8F=82=E6=95=B0,gpt=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Components/Pages/VideoTaskShow.razor | 3 +- .../Components/Pages/VideoTaskShow.razor.cs | 9 ++-- .../AICore/GPT/ChatGPT/ChatGPTModel.cs | 41 +++++++++++++++-- .../AICore/GPT/ChatGPT/ChatGPTType.cs | 44 +++++++++++++++++++ .../AICore/GPT/ChatGPT/Chat_GPT.cs | 34 +++++++++----- .../AICore/GPT/Dto/QuestionRes.cs | 6 +-- .../AICore/SherpaOnnx/SenseVoice.cs | 42 +++++++++++++----- VideoAnalysisCore/Common/AppCommon.cs | 29 ++++++++++++ VideoAnalysisCore/VideoAnalysisCore.csproj | 26 +++++++++++ 9 files changed, 199 insertions(+), 35 deletions(-) create mode 100644 VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTType.cs diff --git a/VideoAnalysis/Components/Pages/VideoTaskShow.razor b/VideoAnalysis/Components/Pages/VideoTaskShow.razor index 3626de3..6b722bf 100644 --- a/VideoAnalysis/Components/Pages/VideoTaskShow.razor +++ b/VideoAnalysis/Components/Pages/VideoTaskShow.razor @@ -46,8 +46,7 @@ } }); //时间片 - let segment = displayButton.find(s => currentTime >= s.startTime - && currentTime <= s.endTime) + let segment = displayButton.findLast(s => currentTime >= s.startTime) if (segment) { segment.button.style.backgroundColor = "rgb(238, 200, 118)"; if (lastSegments && lastSegments != segment) lastSegments.button.style.backgroundColor = "rgb(240, 249, 235)"; diff --git a/VideoAnalysis/Components/Pages/VideoTaskShow.razor.cs b/VideoAnalysis/Components/Pages/VideoTaskShow.razor.cs index 318a073..3b4ce39 100644 --- a/VideoAnalysis/Components/Pages/VideoTaskShow.razor.cs +++ b/VideoAnalysis/Components/Pages/VideoTaskShow.razor.cs @@ -59,10 +59,11 @@ namespace Learn.VideoAnalysis.Components.Pages { var sf = ((int)((segment.StartTime ?? 0) / 60)).ToString().PadLeft(2,'0'); var sm = ((int)((segment.StartTime ?? 0) % 60)).ToString().PadLeft(2, '0'); - var ef = ((int)((segment.EndTime ?? 0) / 60)).ToString().PadLeft(2, '0'); - var em = ((int)((segment.EndTime ?? 0) % 60)).ToString().PadLeft(2, '0'); - return $"{sf}:{sm} - {ef}: {em}"; - } + return $"{sf}:{sm}"; + //var ef = ((int)((segment.EndTime ?? 0) / 60)).ToString().PadLeft(2, '0'); + //var em = ((int)((segment.EndTime ?? 0) % 60)).ToString().PadLeft(2, '0'); + //return $"{sf}:{sm} - {ef}: {em}"; + } /// /// 初始化 /// diff --git a/VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTModel.cs b/VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTModel.cs index c4f9b6d..0e65958 100644 --- a/VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTModel.cs +++ b/VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTModel.cs @@ -15,10 +15,45 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT /// 对话 /// public Message[] messages { get; set; } - public string model { get; set; } = "gpt-4o"; + /// + /// 提问种子值[用来确保 相同参数请求尽可能返回相同参数] + /// 默认:null + /// 此功能处于 Beta 阶段。 如果指定,我们的系统将尽最大努力确定性地采样,这样具有相同 and 参数的重复请求应该返回相同的结果。 无法保证确定性,您应该参考 response 参数来监控后端的变化 + /// + public int? seed { get; set; } =null; + public string model { get; set; } = ChatGPTType.GPT4o; + /// + /// 要使用的采样温度,介于 0 和 2 之间。较高的值(如 0.8)将使输出更加随机,而较低的值(如 0.2)将使其更加集中和确定。 我们通常建议更改此项或同时更改两者。top_p + /// 默认为 1 + /// 联动 + /// public float temperature { get; set; } = 0.2f; - public float max_tokens { get; set; } = 4000; - public object response_format = new { type = "json_object" }; // 指定结构化输出格式 + /// + /// 一种替代温度采样的方法,称为原子核采样, 其中,模型考虑具有top_p概率的标记的结果 质量。所以 0.1 表示仅包含前 10% 概率质量的代币 被考 + /// 建议与联动 + /// + public float top_p { get; set; } = 0.5f; + public float max_completion_tokens { get; set; } = 5000; + /// + /// 一个对象,用于指定模型必须输出的格式。设置为 enable 结构化输出,确保模型与您提供的 JSON 匹配 图式。 + /// + public object response_format = new { type = "json_object" }; + /// + /// 流式返回 + /// + public bool stream =false; + /// + /// 您希望模型为此请求生成的 Output types。 大多数模型都能够生成文本,这是 + /// 默认设置: ["text"] + /// 该模型还可用于生成音频。自 请求此模型同时生成文本和音频响应,您可以 用:gpt-4o-audio-preview["text", "audio"] + /// + public string modalities = "[\"json\"]"; + /// + /// ai引导新话题 + /// 默认-2 范围[-2~2] + /// + public int presence_penalty = -2; + } public class Message { diff --git a/VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTType.cs b/VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTType.cs new file mode 100644 index 0000000..9aba558 --- /dev/null +++ b/VideoAnalysisCore/AICore/GPT/ChatGPT/ChatGPTType.cs @@ -0,0 +1,44 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace VideoAnalysisCore.AICore.GPT.ChatGPT +{ + public class ChatGPTType + { + public static string GPT4oLatest = "chatgpt-4o-latest"; + public static string GPT4o241120 = "gpt-4o-2024-11-20"; + public static string GPT4o240513 = "gpt-4o-2024-05-13"; + /// + /// GPT-4O 型 -> gpt-4o-2024-08-06 + /// + public static string GPT4o= "gpt-4o"; + + public static string GPT4oMini= "gpt-4o-mini"; + public static string GPT4oMini240718 = "gpt-4o-mini-2024-07-18"; + + public static string GPT4Turbo= "gpt-4-turbo-2024-04-09"; + /// + /// gpt-4-turbo-preview gpt-4-0125-preview + /// + public static string GPT4TurboPreview = "gpt-4-turbo-preview"; + + + /// + /// o1 系列模型通过强化学习进行训练,以执行复杂的推理。o1 模型在回答之前会思考,在回应用户之前会产生一个漫长的内部思维链。在我们的推理指南中了解 o1 模型的功能。 + ///目前有两种型号可供选择: + ///O1:旨在解决跨领域的难题的推理模型 + ///O1-Mini:用于专业任务的快速且经济实惠的推理模型 + /// + public static string GPTo1 = "o1"; + /// + /// o1 系列模型通过强化学习进行训练,以执行复杂的推理。o1 模型在回答之前会思考,在回应用户之前会产生一个漫长的内部思维链。在我们的推理指南中了解 o1 模型的功能。 + ///目前有两种型号可供选择: + ///O1:旨在解决跨领域的难题的推理模型 + ///O1-Mini:用于专业任务的快速且经济实惠的推理模型 + /// + public static string GPTo1Mini = "o1-mini"; + } +} diff --git a/VideoAnalysisCore/AICore/GPT/ChatGPT/Chat_GPT.cs b/VideoAnalysisCore/AICore/GPT/ChatGPT/Chat_GPT.cs index 8e786e8..04b60d6 100644 --- a/VideoAnalysisCore/AICore/GPT/ChatGPT/Chat_GPT.cs +++ b/VideoAnalysisCore/AICore/GPT/ChatGPT/Chat_GPT.cs @@ -13,7 +13,7 @@ using System.Threading.Tasks; namespace VideoAnalysisCore.AICore.GPT.ChatGPT { /// - /// kimi 文本模型 + /// chatgpt 文本模型 /// public class Chat_GPT : IBserGPT { @@ -48,7 +48,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT // 如果类型相同,则扩展时间段 if (current.Theme == next.Theme) { - current.EndTime = Math.Max(current.EndTime.Value, next.EndTime.Value); + //current.EndTime = Math.Max(current.EndTime.Value, next.EndTime.Value); current.Content += next.Content; } else @@ -90,24 +90,26 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT var fileNameInfoRes = await ChatAsync(task, fileNamePostMessages,null, fileNameResFormat); var captions = ExpandFunction.GetSpeakerCaptions(task); + var maxVideoTime = captions?.TimeBase?.LastOrDefault()?.End??0; var criteriaBuilder = new StringBuilder(); - var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Section":章节(string),"Theme":主题(string),"ThemeDetalis":主题详情(string),"Content":内容总结(string)}]"""; + //var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Section":章节(string),"Theme":主题(string),"ThemeDetalis":主题详情(string),"Content":内容总结(string)}]"""; + var resFormat = """[{"StartTime":开始秒(number),"Section":章节(string),"Theme":主题(string),"ThemeDetalis":主题详情(string),"Content":内容总结(string)}]"""; var know = await knowledgeInfoDB.GetFirstAsync(s => s.Name == fileNameInfoRes.授课章节); var knowledgeInfos = await knowledgeInfoDB.AsQueryable().ToChildListAsync(s => s.Parent_Id, know.Id); var knows = "数列的概念,数列的定义,项的表示,数列的表示方法,通项公式,递推公式,图像表示,数列的类型,等差数列,等比数列,其他特殊数列,数列的性质,单调性,有限性,数列的求和,等差数列求和公式,等比数列求和公式,数列极限,递推关系"; knows = string.Join(',', knowledgeInfos.Select(s => s.Name)); var postMessages = - $"你的任务是分析视频字幕内容并提取出中国高考考试试题方法点,然后根据步骤分析出内容片段" + + $"你的任务是分析视频字幕内容并提取出中国高考考试试题方法点,然后根据步骤分析出知识片段" + $"按以下步骤完成:" + $"1.识别方法点:提取字幕内容中与{subject}考试属于{fileNameInfoRes.授课章节}章节相关的方法点。" + $"2.分析总结:基于提取出的方法点名称来匹配我提供的方法点名称" + $"提供的方法点名称(基本概念,课堂练习,例题讲解,{knows})。" + - $"3.关联合并相似的知识点来合并为内容片段。" + - $"内容片段使用关联知识点中的最小(开始秒)和(最大)结束秒,主题为关联知识点的主题分析,内容总结为关联知识点的内容总结分析。" + - $"4.基于内容片段的内容总结加上主题来分析这个片段对主题的讲解内容为新的主题 例(数列的基本概念)。" + - $"5.分配空余未使用的时间段到内容相近的内容片段时间区间来获取更加详细的上下文,但是请避免内容片段之间时间重合。" + + $"3.关联合并知识内容相似的知识点来合并为知识片段。" + + $"知识片段使用关联知识点中的最小开始时间主题为关联知识点的主题分析,内容总结为关联知识点的内容总结分析。" + + $"4.基于'知识片段'的'内容总结'加上'主题'来分析这个片段对主题的讲解内容为新的主题 例如(数列的基本概念)。" + + $"5.分配空余未使用的时间段到内容相近的知识片段时间区间来获取更加详细的上下文,但是请避免知识片段之间时间重合。" + $"输入:包含时间戳的视频字幕文本。" + $"以下是包含时间的视频字幕文本。" + $"字幕格式(说话人:开始秒:结束秒:内容|下一段字幕).字幕列表 {captions.Captions}" + @@ -136,6 +138,15 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT if (questionRes.Count(s=>s.ThemeDetalis == questionRes.First().ThemeDetalis) >= 3) throw new Exception("视频分段主题重复 =>" + questionRes.First().ThemeDetalis); + for (int i = 0; i < questionRes.Length; i++) + { + var item = questionRes[i]; + if (i == questionRes.Length - 1) + item.EndTime = maxVideoTime; + else + item.EndTime = (int)(questionRes[i + 1]?.StartTime??0) - 1; + } + await RedisExpand.Redis .HMSetAsync(RedisExpandKey.Task(task), "VideoKnows", questionRes); @@ -143,7 +154,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT // $"你的任务是分析json内容并合并含义相似的主题为新的主题" + // $"按以下步骤完成:" + // $"1.合理合并主题字段重复相似的对象为新的json对象,确保内容的连贯性和逻辑性。" + - // $"2.合并对象属性持续时间低于60秒的对象" + + // $"2.合并对象属性持续时间低于60秒的对象" + // $"3.结构化输出。" + // $"输入:json对象 包含总结开始秒,结束秒,持续时间,主题,章节,内容总结" + // $"以下是包含json内容的文本。" + @@ -171,7 +182,8 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT messageArr = messageArr.Where(s => s != null).ToArray(); var chatRep = new ChatRequest { - max_tokens = maxTokens, + model = ChatGPTType.GPT4o241120, + max_completion_tokens = maxTokens, temperature = 0.2f, messages = messageArr }; @@ -268,7 +280,7 @@ namespace VideoAnalysisCore.AICore.GPT.ChatGPT var maxTokens = 4000; var chatRep = new ChatRequest { - max_tokens = maxTokens, + max_completion_tokens = maxTokens, temperature = 0.3f, messages = [ new Message(postMessages,"system"), diff --git a/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs b/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs index 5a6854b..70e1349 100644 --- a/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs +++ b/VideoAnalysisCore/AICore/GPT/Dto/QuestionRes.cs @@ -17,9 +17,9 @@ namespace VideoAnalysisCore.AICore.GPT.Dto /// public float? StartTime { get; set; } public float? EndTime { get; set; } - /// - /// 持续时间 - /// + ///// + ///// 持续时间 + ///// public float? KeepTime => (EndTime ?? 0) - StartTime ?? 0; /// /// 主题 diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs index 0c4dfef..7ca92a5 100644 --- a/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs +++ b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs @@ -3,15 +3,19 @@ using SherpaOnnx; using System; using System.Collections.Generic; using System.Diagnostics; +using System.IO; using System.Linq; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; using VideoAnalysisCore.Common; +using static System.Runtime.InteropServices.JavaScript.JSType; namespace VideoAnalysisCore.AICore.SherpaOnnx { public class SenseVoice { + const string TransducerStr = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"; static OfflineRecognizer OR =default!; //static VoiceActivityDetector VAD = default!; static VadModelConfig VADModelConfig = default!; @@ -20,7 +24,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx /// /// /// 是否使用gpu 报错请看安装CUDA环境 - public static void Init(int numThreads =4,bool useGPU=false) + public static void Init(int numThreads =4,bool useGPU=false,bool useHotwords = false) { Console.WriteLine("初始化 SenseVoice"); OfflineRecognizerConfig config = new OfflineRecognizerConfig(); @@ -53,11 +57,25 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx ////它指定搜索过程中要保留的活动路径数 //config.MaxActivePaths =4; #endregion + //启用热词功能 + if (false) + { + //热词目录 + config.HotwordsFile = Path.Combine(AppCommon.AIModelFile, "Hotwords.txt"); + config.DecodingMethod = "modified_beam_search"; + //热词得分 + config.HotwordsScore = 1.5f; - //热词目录 - config.HotwordsFile = string.Empty; - //热词得分 - config.HotwordsScore =1.5f ; + config.ModelConfig.ModelingUnit = "cjkchar+bpe"; + config.ModelConfig.BpeVocab = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "bpe.model"); + config.ModelConfig.Transducer = new OfflineTransducerModelConfig() + { + Decoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "decoder-epoch-99-avg-1.onnx"), + Encoder = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "encoder-epoch-99-avg-1.onnx"), + Joiner = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", "joiner-epoch-99-avg-1.onnx"), + }; + + } //反转文本规范化规则 fst 的路径 config.RuleFsts = string.Empty; @@ -110,14 +128,14 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx SpeechSegment segment = VAD.Front(); float startTime = segment.Start / (float)sampleRate; float duration = segment.Samples.Length / (float)sampleRate; - OfflineStream stream = OR.CreateStream(); + using OfflineStream stream = OR.CreateStream(); stream.AcceptWaveform(sampleRate, segment.Samples); OR.Decode(stream); if (!string.IsNullOrEmpty(stream.Result.Text)) { res.Add(new() { - Text = stream.Result.Text, + Text = ExpandFunction.HandleFormula(stream.Result.Text), Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero), End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero), }); @@ -194,10 +212,10 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx OR.Decode(stream); if (!string.IsNullOrEmpty(stream.Result.Text)) { - res.Add(new() - { - Text = stream.Result.Text, - Start= (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero), + res.Add(new() + { + Text = ExpandFunction.HandleFormula(stream.Result.Text), + Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero), End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero), }); var progress = (float)(startTime + duration) / (totalSecond) * 100; @@ -223,7 +241,7 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx { res.Add(new() { - Text = stream.Result.Text, + Text = ExpandFunction.HandleFormula(stream.Result.Text), Start = (float)Math.Round(startTime, 2, MidpointRounding.AwayFromZero), End = (float)Math.Round(startTime + duration, 2, MidpointRounding.AwayFromZero), }); diff --git a/VideoAnalysisCore/Common/AppCommon.cs b/VideoAnalysisCore/Common/AppCommon.cs index 0b7de74..b53ffc6 100644 --- a/VideoAnalysisCore/Common/AppCommon.cs +++ b/VideoAnalysisCore/Common/AppCommon.cs @@ -4,11 +4,13 @@ using FreeRedis; using Microsoft.Extensions.DependencyModel; using SqlSugar; using SqlSugar.IOC; +using System.Collections; using System.Collections.Generic; using System.Linq; using System.Reflection; using System.Runtime.Loader; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; using UserCenter.Model.Interface; using VideoAnalysisCore.AICore.SherpaOnnx; @@ -16,6 +18,7 @@ using VideoAnalysisCore.Enum; using VideoAnalysisCore.Interface; using VideoAnalysisCore.Model.Dto; using Whisper.net; +using static System.Runtime.InteropServices.JavaScript.JSType; namespace VideoAnalysisCore.Common { @@ -87,6 +90,32 @@ namespace VideoAnalysisCore.Common /// public static class ExpandFunction { + static Dictionary FormulaData = new Dictionary() + { + { "阿尔法","α"}, + { "贝塔","β"}, + { "伽马","γ"}, + { "德尔塔","Δ"}, + { "派","π"}, + { "西格马","∑"}, + { "欧米伽","Ω"}, + { "普西","Ψ"}, + }; + static string FormulaDataKey = string.Join("|", FormulaData.Keys); + /// + /// 处理数学公式 + /// + /// + /// + public static string HandleFormula(string f) + { + if (string.IsNullOrEmpty(f)) + return f; + return Regex.Replace(f, FormulaDataKey, + match => + FormulaData[match.Value] + ); + } /// /// 转换 ant 查询枚举 到 sqlsuger枚举 /// diff --git a/VideoAnalysisCore/VideoAnalysisCore.csproj b/VideoAnalysisCore/VideoAnalysisCore.csproj index 0393a3a..70c82a8 100644 --- a/VideoAnalysisCore/VideoAnalysisCore.csproj +++ b/VideoAnalysisCore/VideoAnalysisCore.csproj @@ -8,8 +8,13 @@ + + + + + @@ -17,12 +22,27 @@ Never + + Always + Never Never + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + Never @@ -45,4 +65,10 @@ + + + + Never + +