From b1cbedb9e841834c5241cbe022ea5f90efb15e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E8=82=A5=E7=BE=8A?= <1048382248@qq.com> Date: Mon, 4 Nov 2024 17:13:56 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20SenseVoice=20=E5=AD=97?= =?UTF-8?q?=E5=B9=95=E8=AF=86=E5=88=ABAI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VideoAnalysis/appsettings.json | 3 +- .../AICore/ChatGPT/KIMI/KIMI_GPT.cs | 7 +- .../AICore/SherpaOnnx/SenseVoice.cs | 92 +++++++++++++++++++ VideoAnalysisCore/VideoAnalysisCore.csproj | 2 +- 4 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs diff --git a/VideoAnalysis/appsettings.json b/VideoAnalysis/appsettings.json index 1e3df8d..c84b063 100644 --- a/VideoAnalysis/appsettings.json +++ b/VideoAnalysis/appsettings.json @@ -19,7 +19,8 @@ "ChatGpt": { "KIMI": { "Host": "https://api.moonshot.cn", - "ApiKey": "sk-CNYJdRHgJsgtgw1Q8GhQ5ayXuFPVLSk5bduOF4l2FMvI5lUo" + //"ApiKey": "sk-CNYJdRHgJsgtgw1Q8GhQ5ayXuFPVLSk5bduOF4l2FMvI5lUo" + "ApiKey": "sk-8BvvhESZIkgUbiaaJhglPxFa4o2X9H3xEv9lXELrWWwGxHWY" } }, "DB": { diff --git a/VideoAnalysisCore/AICore/ChatGPT/KIMI/KIMI_GPT.cs b/VideoAnalysisCore/AICore/ChatGPT/KIMI/KIMI_GPT.cs index 6239b65..ce7d493 100644 --- a/VideoAnalysisCore/AICore/ChatGPT/KIMI/KIMI_GPT.cs +++ b/VideoAnalysisCore/AICore/ChatGPT/KIMI/KIMI_GPT.cs @@ -67,7 +67,7 @@ namespace VideoAnalysisCore.AICore.ChatGPT.KIMI criteriaBuilder.Append("|"); } - var resFormat = "问题编号:int,结果:array|bool,问题解释:string"; + var resFormat = "[{问题编号:int,结果:array|bool,问题解释:string}]"; var postMessages = $"以下是一段音频的字幕,分析这段字幕(格式 说话人:开始秒:结束秒:内容|下一段字幕)." + $"来简明的回答提出的问题 问题列表 {criteriaBuilder} " + @@ -80,15 +80,14 @@ namespace VideoAnalysisCore.AICore.ChatGPT.KIMI var modelId = reqTokenCount > 32 * 1000 ? "moonshot-v1-128k" : "moonshot-v1-32k"; var chatRep = new ChatReq { - max_tokens =1000 * 31, + max_tokens = reqTokenCount * 2, temperature = 0.3, frequency_penalty = 0, presence_penalty = 0, model = modelId, messages = new List(){ new MessagesItem(postMessages,"system"), - //todo 规定返回json格式 - //new MessagesItem(postMessages,"assistant"), + new MessagesItem(postMessages,"assistant"), } }; var chatResp = await moonshotClient.Chat(chatRep); diff --git a/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs new file mode 100644 index 0000000..8067b92 --- /dev/null +++ b/VideoAnalysisCore/AICore/SherpaOnnx/SenseVoice.cs @@ -0,0 +1,92 @@ +using Microsoft.Extensions.Options; +using SherpaOnnx; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using VideoAnalysisCore.Common; + +namespace VideoAnalysisCore.AICore.SherpaOnnx +{ + public class SenseVoice + { + static OfflineRecognizer OR =default!; + /// + /// 初始化 SenseVoice + /// + /// + /// + public static void Init(int speakerNumber = 0, double threshold = 0.6) + { + OfflineRecognizerConfig config = new OfflineRecognizerConfig(); + //采样率 + config.FeatConfig.SampleRate = 16000; + //用于训练模型的特征维度 + config.FeatConfig.FeatureDim = 80; + //Path to tokens.txt + config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "tokens.txt"); + //SenseVoice 模型 + config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "model.onnx"); + //1 使用逆文本规范化处理感官语音。 + config.ModelConfig.SenseVoice.UseInverseTextNormalization =1; + //模型类型 + config.ModelConfig.ModelType = string.Empty; + + #region 有效的解码方法 + //贪婪搜索[greedy_search] 改进的波束搜索 [modified_beam_search] + //贪婪搜索 + config.DecodingMethod = "greedy_search"; + + ////改进的波束搜索 + //config.DecodingMethod = "modified_beam_search"; + ////仅在 --decoding--method 为 [波束搜索]modified_beam_search 时使用。 + ////它指定搜索过程中要保留的活动路径数 + //config.MaxActivePaths =4; + #endregion + + //热词目录 + config.HotwordsFile = string.Empty; + //热词得分 + config.HotwordsScore =1.5f ; + //反转文本规范化规则 fst 的路径 + config.RuleFsts = string.Empty; + + config.ModelConfig.Debug = 0; + + OR = new OfflineRecognizer(config); + } + /// + /// 获取语音字幕 + /// + /// + /// + public static async Task RunTask(string task) + { + var filePath = Path.Combine(task.LocalPath(), task + ".wav"); + if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) + throw new Exception("task 音频路径未找到"); + OfflineStream stream = OR.CreateStream(); + WaveReader waveReader = new WaveReader(filePath); + stream.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); + OR.Decode(stream); + + var r = stream.Result; + Console.WriteLine("--------------------"); + Console.WriteLine("Text: {0}", r.Text); + Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens)); + if (r.Timestamps != null && r.Timestamps.Length > 0) + { + Console.Write("Timestamps: ["); + var sep = ""; + for (int k = 0; k != r.Timestamps.Length; ++k) + { + Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00")); + sep = ", "; + } + Console.WriteLine("]"); + } + await Task.CompletedTask; + } + } +} diff --git a/VideoAnalysisCore/VideoAnalysisCore.csproj b/VideoAnalysisCore/VideoAnalysisCore.csproj index af346ad..80f3bd3 100644 --- a/VideoAnalysisCore/VideoAnalysisCore.csproj +++ b/VideoAnalysisCore/VideoAnalysisCore.csproj @@ -49,7 +49,7 @@ - +