using System.Collections.Generic;
using System.Linq;
using System.Text.Json.Serialization;
using VideoAnalysisCore.Common;
using SherpaOnnx;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
///
/// 说话人模型
/// pyannote
///
public class Speaker
{
private static OfflineSpeakerDiarization? SD;
///
/// 初始化 Speaker
///
///
///
///
public static void Init(int speakerNumber = 2, float threshold = 0.6f,bool useGPU = false)
{
Console.WriteLine("初始化 Speaker");
var config = new OfflineSpeakerDiarizationConfig();
//Pyannote模型地址
config.Segmentation.Pyannote.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-pyannote-segmentation-3-0", "model.onnx");
//验证模型
config.Embedding.Model = Path.Combine(AppCommon.AIModelFile, "wespeaker", "wespeaker_zh_cnceleb_resnet34_LM.onnx");
//说话人数量
config.Clustering.NumClusters = speakerNumber;
config.Embedding.NumThreads = 4;
//需要使用GPU
if (!useGPU)
config.Embedding.Provider = "cuda";
#if DEBUG
config.Embedding.Debug = 1;
#endif
//说话人判定阈值
config.Clustering.Threshold = threshold;
SD = new OfflineSpeakerDiarization(config);
}
///
/// 运行 说话人日志
///
///
public static async Task Run(string task)
{
var filePath = Path.Combine(task.LocalPath(), task + ".wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
if (SD is null)
throw new Exception("Speaker 未进行初始化");
var waveReader = new WaveReader(filePath);
if (SD.SampleRate != waveReader.SampleRate)
throw new Exception($"预期采样率:{SD.SampleRate}. 传入: {waveReader.SampleRate}");
var i = 0;
var segments = SD.ProcessWithCallback(waveReader.Samples,
(numProcessedChunks, numTotalChunks, arg) =>
{
i++;
if(i%20 !=0)
return 1;
var progress = (float)numProcessedChunks / numTotalChunks * 100;
RedisExpand.SetTaskProgress(task, progress);
return 1;
}, nint.Zero);
var res = segments.Select(s => new OfflineSpeakerRes(s));
await RedisExpand.Redis.HSetAsync(RedisExpandKey.Task(task), "Speaker", res);
//加入下一队列
RedisExpand.InsertChannel(Enum.RedisChannelEnum.ChatModelAnalysis, task);
}
}
///
/// 讲话人日志结果
///
public class OfflineSpeakerRes
{
///
/// 总持续时间
///
[JsonIgnore]
public float Total => End - Start;
///
/// 开始时间
///
public float Start { get; set; }
///
/// 结束时间
///
public float End { get; set; }
///
/// 讲话人索引
///
public int SpeakerIndex { get; set; }
///
/// 讲话人日志结果
///
///
public OfflineSpeakerRes()
{
}
///
/// 讲话人日志结果
///
///
public OfflineSpeakerRes(OfflineSpeakerDiarizationSegment sds)
{
Start = (float)Math.Round(sds.Start, 2, MidpointRounding.AwayFromZero);
End = (float)Math.Round(sds.End, 2, MidpointRounding.AwayFromZero);
SpeakerIndex = sds.Speaker;
}
}
}