Learn.VideoAnalysis/VideoAnalysisCore/AICore/SherpaOnnx/Speaker.cs

119 lines
4.1 KiB
C#

using System.Collections.Generic;
using System.Linq;
using System.Text.Json.Serialization;
using VideoAnalysisCore.Common;
using SherpaOnnx;
namespace VideoAnalysisCore.AICore.SherpaOnnx
{
/// <summary>
/// 说话人模型
/// <para>pyannote </para>
/// </summary>
public class Speaker
{
private static OfflineSpeakerDiarization? SD;
/// <summary>
/// 初始化 Speaker
/// </summary>
/// <param name="speakerNumber"></param>
/// <param name="threshold"></param>
/// <param name="useGPU"></param>
public static void Init(int speakerNumber = 2, float threshold = 0.6f,bool useGPU = false)
{
Console.WriteLine("初始化 Speaker");
var config = new OfflineSpeakerDiarizationConfig();
//Pyannote模型地址
config.Segmentation.Pyannote.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-pyannote-segmentation-3-0", "model.onnx");
//验证模型
config.Embedding.Model = Path.Combine(AppCommon.AIModelFile, "wespeaker", "wespeaker_zh_cnceleb_resnet34_LM.onnx");
//说话人数量
config.Clustering.NumClusters = speakerNumber;
config.Embedding.NumThreads = 4;
//需要使用GPU
if (!useGPU)
config.Embedding.Provider = "cuda";
//说话人判定阈值
config.Clustering.Threshold = threshold;
SD = new OfflineSpeakerDiarization(config);
}
/// <summary>
/// 运行 说话人日志
/// </summary>
/// <param name="task"></param>
public static async Task Run(string task)
{
var filePath = Path.Combine(task.LocalPath(), task + ".wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
if (SD is null)
throw new Exception("Speaker 未进行初始化");
var waveReader = new WaveReader(filePath);
if (SD.SampleRate != waveReader.SampleRate)
throw new Exception($"预期采样率:{SD.SampleRate}. 传入: {waveReader.SampleRate}");
var i = 0;
var segments = SD.ProcessWithCallback(waveReader.Samples,
(numProcessedChunks, numTotalChunks, arg) =>
{
i++;
if(i%20 !=0)
return 1;
var progress = (float)numProcessedChunks / numTotalChunks * 100;
RedisExpand.SetTaskProgress(task, progress);
return 1;
}, nint.Zero);
var res = segments.Select(s => new OfflineSpeakerRes(s));
await RedisExpand.Redis.HSetAsync(RedisExpandKey.Task(task), "Speaker", res);
//加入下一队列
RedisExpand.InsertChannel(Enum.RedisChannelEnum.ChatModelAnalysis, task);
}
}
/// <summary>
/// 讲话人日志结果
/// </summary>
public class OfflineSpeakerRes
{
/// <summary>
/// 总持续时间
/// </summary>
[JsonIgnore]
public float Total => End - Start;
/// <summary>
/// 开始时间
/// </summary>
public float Start { get; set; }
/// <summary>
/// 结束时间
/// </summary>
public float End { get; set; }
/// <summary>
/// 讲话人索引
/// </summary>
public int SpeakerIndex { get; set; }
/// <summary>
/// 讲话人日志结果
/// </summary>
/// <param name="sds"></param>
public OfflineSpeakerRes()
{
}
/// <summary>
/// 讲话人日志结果
/// </summary>
/// <param name="sds"></param>
public OfflineSpeakerRes(OfflineSpeakerDiarizationSegment sds)
{
Start = (float)Math.Round(sds.Start, 2, MidpointRounding.AwayFromZero);
End = (float)Math.Round(sds.End, 2, MidpointRounding.AwayFromZero);
SpeakerIndex = sds.Speaker;
}
}
}