119 lines
4.1 KiB
C#
119 lines
4.1 KiB
C#
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text.Json.Serialization;
|
|
using VideoAnalysisCore.Common;
|
|
using SherpaOnnx;
|
|
|
|
namespace VideoAnalysisCore.AICore.SherpaOnnx
|
|
{
|
|
|
|
/// <summary>
|
|
/// 说话人模型
|
|
/// <para>pyannote </para>
|
|
/// </summary>
|
|
public class Speaker
|
|
{
|
|
private static OfflineSpeakerDiarization? SD;
|
|
/// <summary>
|
|
/// 初始化 Speaker
|
|
/// </summary>
|
|
/// <param name="speakerNumber"></param>
|
|
/// <param name="threshold"></param>
|
|
/// <param name="useGPU"></param>
|
|
public static void Init(int speakerNumber = 2, float threshold = 0.6f,bool useGPU = false)
|
|
{
|
|
Console.WriteLine("初始化 Speaker");
|
|
var config = new OfflineSpeakerDiarizationConfig();
|
|
//Pyannote模型地址
|
|
config.Segmentation.Pyannote.Model = Path.Combine(AppCommon.AIModelFile, "sherpa-onnx-pyannote-segmentation-3-0", "model.onnx");
|
|
//验证模型
|
|
config.Embedding.Model = Path.Combine(AppCommon.AIModelFile, "wespeaker", "wespeaker_zh_cnceleb_resnet34_LM.onnx");
|
|
//说话人数量
|
|
config.Clustering.NumClusters = speakerNumber;
|
|
config.Embedding.NumThreads = 4;
|
|
//需要使用GPU
|
|
if (!useGPU)
|
|
config.Embedding.Provider = "cuda";
|
|
//说话人判定阈值
|
|
config.Clustering.Threshold = threshold;
|
|
SD = new OfflineSpeakerDiarization(config);
|
|
}
|
|
/// <summary>
|
|
/// 运行 说话人日志
|
|
/// </summary>
|
|
/// <param name="task"></param>
|
|
public static async Task Run(string task)
|
|
{
|
|
var filePath = Path.Combine(task.LocalPath(), task + ".wav");
|
|
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
|
throw new Exception("task 音频路径未找到");
|
|
|
|
if (SD is null)
|
|
throw new Exception("Speaker 未进行初始化");
|
|
var waveReader = new WaveReader(filePath);
|
|
if (SD.SampleRate != waveReader.SampleRate)
|
|
throw new Exception($"预期采样率:{SD.SampleRate}. 传入: {waveReader.SampleRate}");
|
|
|
|
var i = 0;
|
|
var segments = SD.ProcessWithCallback(waveReader.Samples,
|
|
(numProcessedChunks, numTotalChunks, arg) =>
|
|
{
|
|
i++;
|
|
if(i%20 !=0)
|
|
return 1;
|
|
var progress = (float)numProcessedChunks / numTotalChunks * 100;
|
|
RedisExpand.SetTaskProgress(task, progress);
|
|
return 1;
|
|
}, nint.Zero);
|
|
var res = segments.Select(s => new OfflineSpeakerRes(s));
|
|
await RedisExpand.Redis.HSetAsync(RedisExpandKey.Task(task), "Speaker", res);
|
|
//加入下一队列
|
|
RedisExpand.InsertChannel(Enum.RedisChannelEnum.ChatModelAnalysis, task);
|
|
|
|
}
|
|
}
|
|
/// <summary>
|
|
/// 讲话人日志结果
|
|
/// </summary>
|
|
public class OfflineSpeakerRes
|
|
{
|
|
/// <summary>
|
|
/// 总持续时间
|
|
/// </summary>
|
|
[JsonIgnore]
|
|
public float Total => End - Start;
|
|
/// <summary>
|
|
/// 开始时间
|
|
/// </summary>
|
|
public float Start { get; set; }
|
|
/// <summary>
|
|
/// 结束时间
|
|
/// </summary>
|
|
public float End { get; set; }
|
|
/// <summary>
|
|
/// 讲话人索引
|
|
/// </summary>
|
|
public int SpeakerIndex { get; set; }
|
|
|
|
/// <summary>
|
|
/// 讲话人日志结果
|
|
/// </summary>
|
|
/// <param name="sds"></param>
|
|
public OfflineSpeakerRes()
|
|
{
|
|
}
|
|
/// <summary>
|
|
/// 讲话人日志结果
|
|
/// </summary>
|
|
/// <param name="sds"></param>
|
|
public OfflineSpeakerRes(OfflineSpeakerDiarizationSegment sds)
|
|
{
|
|
Start = (float)Math.Round(sds.Start, 2, MidpointRounding.AwayFromZero);
|
|
End = (float)Math.Round(sds.End, 2, MidpointRounding.AwayFromZero);
|
|
SpeakerIndex = sds.Speaker;
|
|
}
|
|
}
|
|
}
|
|
|