using System.Collections.Generic; using System.Linq; using System.Text.Json.Serialization; using VideoAnalysisCore.Common; using SherpaOnnx; using SqlSugar.IOC; using VideoAnalysisCore.Model; using System.Text.Json; using VideoAnalysisCore.Model.Enum; using Microsoft.Extensions.DependencyInjection; using UserCenter.Model.Enum; namespace VideoAnalysisCore.AICore.SherpaOnnx { /// /// 说话人模型 /// pyannote /// public static class Speaker { private static OfflineSpeakerDiarization? SD; /// /// 初始化 Speaker /// /// /// /// public static void AddSpeakerAI(this IServiceCollection service,int speakerNumber = 2, float threshold = 0.6f,bool useGPU = false) { Console.WriteLine($"{DateTime.Now}=>初始化 Speaker"); var config = new OfflineSpeakerDiarizationConfig(); //Pyannote模型地址 config.Segmentation.Pyannote.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-pyannote-segmentation-3-0", "model.onnx"); //验证模型 config.Embedding.Model = Path.Combine(AppCommon.AIModelFile, "wespeaker", "wespeaker_zh_cnceleb_resnet34_LM.onnx"); //说话人数量 config.Clustering.NumClusters = speakerNumber; config.Embedding.NumThreads = 4; //需要使用GPU if (!useGPU) config.Embedding.Provider = "cuda"; #if DEBUG config.Embedding.Debug = 1; #endif //说话人判定阈值 config.Clustering.Threshold = threshold; SD = new OfflineSpeakerDiarization(config); } /// /// 运行 说话人日志 /// /// public static async Task Run(string task) { var filePath = Path.Combine(task.LocalPath(), task + ".wav"); if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath)) throw new Exception("task 音频路径未找到"); if (SD is null) throw new Exception("Speaker 未进行初始化"); var waveReader = new WaveReader(filePath); if (SD.SampleRate != waveReader.SampleRate) throw new Exception($"预期采样率:{SD.SampleRate}. 传入: {waveReader.SampleRate}"); var i = 0; var segments = SD.ProcessWithCallback(waveReader.Samples, (numProcessedChunks, numTotalChunks, arg) => { i++; if(i%20 !=0) return 1; var progress = (float)numProcessedChunks / numTotalChunks * 100; RedisExpand.SetTaskProgress(task, progress); return 1; }, nint.Zero); var res = segments.Select(s => new OfflineSpeakerRes(s)); await RedisExpand.Redis.HSetAsync(RedisExpandKey.Task(task), "Speaker", res); var speakerStr = JsonSerializer.Serialize(res); DbScoped.Sugar .Updateable() .SetColumns(it => it.Speaker == speakerStr) .Where(it => it.Id == long.Parse(task)); //加入下一队列 RedisExpand.InsertChannel(RedisChannelEnum.ChatModelAnalysis, task); } } /// /// 讲话人日志结果 /// public class OfflineSpeakerRes { /// /// 总持续时间 /// [JsonIgnore] public float Total => End - Start; /// /// 开始时间 /// public float Start { get; set; } /// /// 结束时间 /// public float End { get; set; } /// /// 讲话人索引 /// public int SpeakerIndex { get; set; } /// /// 讲话人日志结果 /// /// public OfflineSpeakerRes() { } /// /// 讲话人日志结果 /// /// public OfflineSpeakerRes(OfflineSpeakerDiarizationSegment sds) { Start = (float)Math.Round(sds.Start, 2, MidpointRounding.AwayFromZero); End = (float)Math.Round(sds.End, 2, MidpointRounding.AwayFromZero); SpeakerIndex = sds.Speaker; } } }