123 lines
4.7 KiB
C#
123 lines
4.7 KiB
C#
using VideoAnalysisCore.Common;
|
||
using System;
|
||
using System.Linq;
|
||
using Whisper.net;
|
||
using Whisper.net.Ggml;
|
||
using Whisper.net.Wave;
|
||
|
||
namespace VideoAnalysisCore.AICore.Whisper
|
||
{
|
||
public static class WhisperHandle
|
||
{
|
||
|
||
/// <summary>
|
||
/// 获取语音字幕
|
||
/// </summary>
|
||
/// <param name="task"></param>
|
||
/// <returns></returns>
|
||
public static async Task RunTask(string task)
|
||
{
|
||
var filePath = Path.Combine(task.LocalPath(), task + ".wav");
|
||
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
|
||
throw new Exception("task 音频路径未找到");
|
||
var opt = new WhisperOption(filePath) { ModelName = AppCommon.Config.Whisper.ModelName };
|
||
var modPath = Path.Combine(AppCommon.AIModelFile, opt.ModelName);
|
||
// 从给定的模型名称路径创建一个 WhisperFactory 实例
|
||
using var factory = WhisperFactory.FromPath(modPath);
|
||
var builder = factory.CreateBuilder()
|
||
//自定义提示词
|
||
.WithPrompt("以下是普通话的句子")
|
||
//设置语言
|
||
.WithLanguage(opt.Language);
|
||
|
||
// 如果值为 "translate",则在构建器中启用翻译功能
|
||
//if (opt.Command == "translate")
|
||
//{
|
||
// builder.WithTranslate();
|
||
//}
|
||
using var processor = builder.Build();
|
||
// 打开一个文件流来读取由 opt.FileName 指定的音频文件
|
||
using var fileStream = File.OpenRead(filePath);
|
||
var res = new List<WhisperResDto>(200);
|
||
// 使用处理器异步处理音频文件。对于处理器返回的每个段(segment),它将段的开始时间、结束时间和文本打印到控制台。
|
||
await foreach (var segment in processor.ProcessAsync(fileStream, CancellationToken.None))
|
||
{
|
||
res.Add(new WhisperResDto(segment));
|
||
}
|
||
RedisExpand.Redis.HMSet(RedisExpandKey.Task(task), "Captions", res);
|
||
RedisExpand.InsertChannel(Enum.RedisChannelEnum.ParsingSpeaker, task);
|
||
}
|
||
/// <summary>
|
||
/// 检测语言的方法
|
||
/// </summary>
|
||
/// <param name="opt"></param>
|
||
static void LanguageIdentification(WhisperOption opt)
|
||
{
|
||
var modPath = Path.Combine(AppCommon.AIModelFile, opt.ModelName);
|
||
// 使用 File.ReadAllBytes 方法将模型文件读取到内存中。
|
||
var bufferedModel = File.ReadAllBytes(modPath);
|
||
|
||
// 多个任务可以使用同一个工厂来创建处理器
|
||
using var factory = WhisperFactory.FromBuffer(bufferedModel);
|
||
|
||
// 使用工厂创建一个新的构建器,并设置其语言
|
||
var builder = factory.CreateBuilder()
|
||
.WithLanguage(opt.Language);
|
||
|
||
using var processor = builder.Build();
|
||
|
||
// 打开一个文件流来读取由 opt.FileName 指定的音频文件
|
||
using var fileStream = File.OpenRead(opt.FilePath);
|
||
|
||
// 使用 WaveParser 类来解析音频文件
|
||
var wave = new WaveParser(fileStream);
|
||
|
||
// 使用 WaveParser 的 GetAvgSamples 方法获取音频文件的平均样本
|
||
var samples = wave.GetAvgSamples();
|
||
|
||
// 使用处理器的 DetectLanguage 方法检测音频样本中的语言
|
||
var language = processor.DetectLanguage(samples, speedUp: true);
|
||
|
||
Console.WriteLine("Language is " + language);
|
||
}
|
||
}
|
||
/// <summary>
|
||
/// 音频处理选项
|
||
/// </summary>
|
||
public class WhisperOption
|
||
{
|
||
/// <summary>
|
||
/// 传入目标文件路径
|
||
/// </summary>
|
||
/// <param name="file"></param>
|
||
public WhisperOption(string file)
|
||
{
|
||
FilePath = file;
|
||
}
|
||
/// <summary>
|
||
/// 指令类型
|
||
/// </summary>
|
||
public string Command { get; set; } = "transcribe";
|
||
/// <summary>
|
||
/// 音频文件,默认要存放bin目录下
|
||
/// </summary>
|
||
public string FilePath { get; set; }
|
||
/// <summary>
|
||
/// 语言,默认自动选择
|
||
/// </summary>
|
||
public string Language { get; set; } = "chinese";
|
||
/// <summary>
|
||
/// 模型文件名称
|
||
/// </summary>
|
||
public string ModelName { get; set; } = "ggml-base.bin";
|
||
/// <summary>
|
||
/// 模型文件路径
|
||
/// </summary>
|
||
public string ModPath => Path.Combine(AppCommon.AIModelFile, ModelName);
|
||
/// <summary>
|
||
/// 模型类型
|
||
/// </summary>
|
||
//public GgmlType ModelType { get; set; } = GgmlType.Base;
|
||
}
|
||
}
|