Compare commits
No commits in common. "9e0dca0283972baca7ab8889a3a9b69e34db97a1" and "263041aa6a1f5cfd858269d1c28700b86284895b" have entirely different histories.
9e0dca0283
...
263041aa6a
|
|
@ -49,7 +49,6 @@ namespace Learn.VideoAnalysis
|
|||
//初始化 插件
|
||||
Speaker.Init();
|
||||
RedisExpand.Init();
|
||||
SenseVoice.Init();
|
||||
|
||||
|
||||
builder.Services.AddScoped(sp =>
|
||||
|
|
|
|||
|
|
@ -19,8 +19,7 @@
|
|||
"ChatGpt": {
|
||||
"KIMI": {
|
||||
"Host": "https://api.moonshot.cn",
|
||||
//"ApiKey": "sk-CNYJdRHgJsgtgw1Q8GhQ5ayXuFPVLSk5bduOF4l2FMvI5lUo"
|
||||
"ApiKey": "sk-8BvvhESZIkgUbiaaJhglPxFa4o2X9H3xEv9lXELrWWwGxHWY"
|
||||
"ApiKey": "sk-CNYJdRHgJsgtgw1Q8GhQ5ayXuFPVLSk5bduOF4l2FMvI5lUo"
|
||||
}
|
||||
},
|
||||
"DB": {
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ namespace VideoAnalysisCore.AICore.ChatGPT.KIMI
|
|||
criteriaBuilder.Append("|");
|
||||
}
|
||||
|
||||
var resFormat = "[{问题编号:int,结果:array|bool,问题解释:string}]";
|
||||
var resFormat = "问题编号:int,结果:array|bool,问题解释:string";
|
||||
var postMessages =
|
||||
$"以下是一段音频的字幕,分析这段字幕(格式 说话人:开始秒:结束秒:内容|下一段字幕)." +
|
||||
$"来简明的回答提出的问题 问题列表 {criteriaBuilder} " +
|
||||
|
|
@ -80,14 +80,15 @@ namespace VideoAnalysisCore.AICore.ChatGPT.KIMI
|
|||
var modelId = reqTokenCount > 32 * 1000 ? "moonshot-v1-128k" : "moonshot-v1-32k";
|
||||
var chatRep = new ChatReq
|
||||
{
|
||||
max_tokens = reqTokenCount * 2,
|
||||
max_tokens =1000 * 31,
|
||||
temperature = 0.3,
|
||||
frequency_penalty = 0,
|
||||
presence_penalty = 0,
|
||||
model = modelId,
|
||||
messages = new List<MessagesItem>(){
|
||||
new MessagesItem(postMessages,"system"),
|
||||
new MessagesItem(postMessages,"assistant"),
|
||||
//todo 规定返回json格式
|
||||
//new MessagesItem(postMessages,"assistant"),
|
||||
}
|
||||
};
|
||||
var chatResp = await moonshotClient.Chat(chatRep);
|
||||
|
|
|
|||
|
|
@ -1,148 +0,0 @@
|
|||
using Microsoft.Extensions.Options;
|
||||
using SherpaOnnx;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using VideoAnalysisCore.AICore.Whisper;
|
||||
using VideoAnalysisCore.Common;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.SherpaOnnx
|
||||
{
|
||||
public class SenseVoice
|
||||
{
|
||||
static OfflineRecognizer OR =default!;
|
||||
static VoiceActivityDetector VAD =default!;
|
||||
static VadModelConfig VADModelConfig = default!;
|
||||
/// <summary>
|
||||
/// 初始化 SenseVoice
|
||||
/// </summary>
|
||||
/// <param name="speakerNumber"></param>
|
||||
/// <param name="threshold"></param>
|
||||
public static void Init(int speakerNumber = 0, double threshold = 0.6)
|
||||
{
|
||||
Console.WriteLine("初始化 SenseVoice");
|
||||
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
|
||||
//采样率
|
||||
config.FeatConfig.SampleRate = 16000;
|
||||
//用于训练模型的特征维度
|
||||
config.FeatConfig.FeatureDim = 80;
|
||||
//Path to tokens.txt
|
||||
config.ModelConfig.Tokens = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "tokens.txt");
|
||||
//SenseVoice 模型
|
||||
config.ModelConfig.SenseVoice.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "model.onnx");
|
||||
//1 使用逆文本规范化处理感官语音。
|
||||
config.ModelConfig.SenseVoice.UseInverseTextNormalization =1;
|
||||
//模型类型
|
||||
config.ModelConfig.ModelType = string.Empty;
|
||||
|
||||
#region 有效的解码方法
|
||||
//贪婪搜索[greedy_search] 改进的波束搜索 [modified_beam_search]
|
||||
//贪婪搜索
|
||||
config.DecodingMethod = "greedy_search";
|
||||
|
||||
////改进的波束搜索
|
||||
//config.DecodingMethod = "modified_beam_search";
|
||||
////仅在 --decoding--method 为 [波束搜索]modified_beam_search 时使用。
|
||||
////它指定搜索过程中要保留的活动路径数
|
||||
//config.MaxActivePaths =4;
|
||||
#endregion
|
||||
|
||||
//热词目录
|
||||
config.HotwordsFile = string.Empty;
|
||||
//热词得分
|
||||
config.HotwordsScore =1.5f ;
|
||||
//反转文本规范化规则 fst 的路径
|
||||
config.RuleFsts = string.Empty;
|
||||
|
||||
config.ModelConfig.Debug = 0;
|
||||
|
||||
OR = new OfflineRecognizer(config);
|
||||
|
||||
VADModelConfig = new VadModelConfig();
|
||||
VADModelConfig.SileroVad.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-sense-voice-24-07-17", "silero_VAD.onnx");
|
||||
VADModelConfig.Debug = 0;
|
||||
//缓冲区大小
|
||||
VAD = new VoiceActivityDetector(VADModelConfig, 60);
|
||||
}
|
||||
/// <summary>
|
||||
/// 获取语音字幕
|
||||
/// </summary>
|
||||
/// <param name="task"></param>
|
||||
/// <returns></returns>
|
||||
public static async Task RunTask(string task)
|
||||
{
|
||||
var filePath = Path.Combine(task.LocalPath(), task + ".wav");
|
||||
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
||||
throw new Exception("task 音频路径未找到");
|
||||
|
||||
string testWaveFilename = filePath;
|
||||
WaveReader reader = new WaveReader(testWaveFilename);
|
||||
|
||||
int numSamples = reader.Samples.Length;
|
||||
int windowSize = VADModelConfig.SileroVad.WindowSize;
|
||||
int sampleRate = VADModelConfig.SampleRate;
|
||||
int numIter = numSamples / windowSize;
|
||||
|
||||
var res = new List<SenseVoiceRes>(500);
|
||||
for (int i = 0; i != numIter; ++i)
|
||||
{
|
||||
int start = i * windowSize;
|
||||
float[] samples = new float[windowSize];
|
||||
Array.Copy(reader.Samples, start, samples, 0, windowSize);
|
||||
VAD.AcceptWaveform(samples);
|
||||
//是否检测到语音
|
||||
if (VAD.IsSpeechDetected())
|
||||
{
|
||||
while (!VAD.IsEmpty())
|
||||
{
|
||||
//获取最新的发言片段
|
||||
SpeechSegment segment = VAD.Front();
|
||||
float startTime = segment.Start / (float)sampleRate;
|
||||
float duration = segment.Samples.Length / (float)sampleRate;
|
||||
OfflineStream stream = OR.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||
OR.Decode(stream);
|
||||
if (!string.IsNullOrEmpty(stream.Result.Text))
|
||||
{
|
||||
res.Add(new()
|
||||
{
|
||||
Text = stream.Result.Text,
|
||||
Start= startTime,
|
||||
End = startTime + duration });
|
||||
}
|
||||
VAD.Pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
VAD.Flush();
|
||||
|
||||
while (!VAD.IsEmpty())
|
||||
{
|
||||
SpeechSegment segment = VAD.Front();
|
||||
float startTime = segment.Start / (float)sampleRate;
|
||||
float duration = segment.Samples.Length / (float)sampleRate;
|
||||
|
||||
OfflineStream stream = OR.CreateStream();
|
||||
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||
OR.Decode(stream);
|
||||
if (!string.IsNullOrEmpty(stream.Result.Text))
|
||||
{
|
||||
res.Add(new()
|
||||
{
|
||||
Text = stream.Result.Text,
|
||||
Start = startTime,
|
||||
End = startTime + duration
|
||||
});
|
||||
}
|
||||
|
||||
VAD.Pop();
|
||||
}
|
||||
|
||||
await RedisExpand.Redis.HMSetAsync(RedisExpandKey.Task(task), "Captions", res);
|
||||
RedisExpand.InsertChannel(Enum.RedisChannelEnum.ParsingSpeaker, task);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
using Whisper.net;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.Whisper
|
||||
{
|
||||
/// <summary>
|
||||
/// 字幕识别 结果
|
||||
/// </summary>
|
||||
public class SenseVoiceRes
|
||||
{
|
||||
public SenseVoiceRes()
|
||||
{
|
||||
|
||||
}
|
||||
/// <summary>
|
||||
/// 文本
|
||||
/// </summary>
|
||||
public string Text { get; set; } = string.Empty;
|
||||
/// <summary>
|
||||
/// 开始时间
|
||||
/// </summary>
|
||||
|
||||
public float Start { get; set; }
|
||||
/// <summary>
|
||||
/// 结束时间
|
||||
/// </summary>
|
||||
|
||||
public float End { get; set; }
|
||||
}
|
||||
}
|
||||
|
|
@ -22,7 +22,6 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
/// <param name="threshold"></param>
|
||||
public static void Init(int speakerNumber = 0, double threshold = 0.6)
|
||||
{
|
||||
Console.WriteLine("初始化 Speaker");
|
||||
var config = new OfflineSpeakerDiarizationConfig();
|
||||
//Pyannote模型地址
|
||||
config.Segmentation.Pyannote.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-pyannote-segmentation-3-0", "model.onnx");
|
||||
|
|
@ -73,15 +72,15 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
/// 总持续时间
|
||||
/// </summary>
|
||||
[JsonIgnore]
|
||||
public float Total => End - Start;
|
||||
public decimal Total => End - Start;
|
||||
/// <summary>
|
||||
/// 开始时间
|
||||
/// </summary>
|
||||
public float Start { get; set; }
|
||||
public decimal Start { get; set; }
|
||||
/// <summary>
|
||||
/// 结束时间
|
||||
/// </summary>
|
||||
public float End { get; set; }
|
||||
public decimal End { get; set; }
|
||||
/// <summary>
|
||||
/// 讲话人索引
|
||||
/// </summary>
|
||||
|
|
@ -100,8 +99,8 @@ namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|||
/// <param name="sds"></param>
|
||||
public OfflineSpeakerRes(OfflineSpeakerDiarizationSegment sds)
|
||||
{
|
||||
Start = sds.Start;
|
||||
End =sds.End;
|
||||
Start = (decimal)sds.Start;
|
||||
End = (decimal)sds.End;
|
||||
SpeakerIndex = sds.Speaker;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,10 +7,6 @@ namespace VideoAnalysisCore.AICore.Whisper
|
|||
/// </summary>
|
||||
public class WhisperResDto
|
||||
{
|
||||
public WhisperResDto()
|
||||
{
|
||||
|
||||
}
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
|
|
@ -24,16 +20,16 @@ namespace VideoAnalysisCore.AICore.Whisper
|
|||
/// <summary>
|
||||
/// 文本
|
||||
/// </summary>
|
||||
public string Text { get; set; } = string.Empty;
|
||||
public string Text { get; } = string.Empty;
|
||||
/// <summary>
|
||||
/// 开始时间
|
||||
/// </summary>
|
||||
|
||||
public TimeSpan Start { get; set; }
|
||||
public TimeSpan Start { get; }
|
||||
/// <summary>
|
||||
/// 结束时间
|
||||
/// </summary>
|
||||
|
||||
public TimeSpan End { get; set; }
|
||||
public TimeSpan End { get; }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,12 +79,11 @@ namespace VideoAnalysisCore.Common
|
|||
/// </summary>
|
||||
public static RedisClient Redis = new RedisClient(AppCommon.Config.Redis.ConnectionString);
|
||||
/// <summary>
|
||||
/// 初始化 redis
|
||||
/// 初始化redis
|
||||
/// <para>需要在初始化配置文件时候调用</para>
|
||||
/// </summary>
|
||||
public static void Init()
|
||||
{
|
||||
Console.WriteLine("初始化 redis");
|
||||
Redis.Serialize = obj => System.Text.Json.JsonSerializer.Serialize(obj);
|
||||
Redis.Deserialize = (json, type) => System.Text.Json.JsonSerializer.Deserialize(json, type);
|
||||
InitChannel();
|
||||
|
|
@ -126,7 +125,7 @@ namespace VideoAnalysisCore.Common
|
|||
(msg) => { TouchChannel(RedisChannelEnum.SeparateAudio, msg, FFMPGEHandle.Audio2WAV16KAsync); });
|
||||
|
||||
Redis.SubscribeList(RedisExpandKey.EnumKey(RedisChannelEnum.ParsingCaptions),
|
||||
(msg) => { TouchChannel(RedisChannelEnum.ParsingCaptions, msg, SenseVoice.RunTask); });
|
||||
(msg) => { TouchChannel(RedisChannelEnum.ParsingCaptions, msg, WhisperHandle.RunTask); });
|
||||
Redis.SubscribeList(RedisExpandKey.EnumKey(RedisChannelEnum.ParsingSpeaker),
|
||||
(msg) => { TouchChannel(RedisChannelEnum.ParsingSpeaker, msg, Speaker.Run); });
|
||||
Redis.SubscribeList(RedisExpandKey.EnumKey(RedisChannelEnum.ChatModelAnalysis),
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@
|
|||
<PackageReference Include="Microsoft.Extensions.DependencyModel" Version="7.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Http" Version="8.0.0" />
|
||||
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.10.30" />
|
||||
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="1.10.28" />
|
||||
<PackageReference Include="SqlSugar.IOC" Version="2.0.0" />
|
||||
<PackageReference Include="SqlSugarCore" Version="5.1.4.170" />
|
||||
<PackageReference Include="Whisper.net" Version="1.5.0" />
|
||||
|
|
|
|||
Loading…
Reference in New Issue