Learn.VideoAnalysis/VideoAnalysisCore/AICore/Whisper/WhisperHandle.cs

123 lines
4.7 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using VideoAnalysisCore.Common;
using System;
using System.Linq;
using Whisper.net;
using Whisper.net.Ggml;
using Whisper.net.Wave;
namespace VideoAnalysisCore.AICore.Whisper
{
public static class WhisperHandle
{
/// <summary>
/// 获取语音字幕
/// </summary>
/// <param name="task"></param>
/// <returns></returns>
public static async Task RunTask(string task)
{
var filePath = Path.Combine(task.LocalPath(), task + ".wav");
if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
throw new Exception("task 音频路径未找到");
var opt = new WhisperOption(filePath) { ModelName = AppCommon.Config.Whisper.ModelName };
var modPath = Path.Combine(AppCommon.AIModelFile, opt.ModelName);
// 从给定的模型名称路径创建一个 WhisperFactory 实例
using var factory = WhisperFactory.FromPath(modPath);
var builder = factory.CreateBuilder()
//自定义提示词
.WithPrompt("以下是普通话的句子")
//设置语言
.WithLanguage(opt.Language);
// 如果值为 "translate",则在构建器中启用翻译功能
//if (opt.Command == "translate")
//{
// builder.WithTranslate();
//}
using var processor = builder.Build();
// 打开一个文件流来读取由 opt.FileName 指定的音频文件
using var fileStream = File.OpenRead(filePath);
var res = new List<WhisperResDto>(200);
// 使用处理器异步处理音频文件。对于处理器返回的每个段segment它将段的开始时间、结束时间和文本打印到控制台。
await foreach (var segment in processor.ProcessAsync(fileStream, CancellationToken.None))
{
res.Add(new WhisperResDto(segment));
}
RedisExpand.Redis.HMSet(RedisExpandKey.Task(task), "Captions", res);
RedisExpand.InsertChannel(Enum.RedisChannelEnum.ParsingSpeaker, task);
}
/// <summary>
/// 检测语言的方法
/// </summary>
/// <param name="opt"></param>
static void LanguageIdentification(WhisperOption opt)
{
var modPath = Path.Combine(AppCommon.AIModelFile, opt.ModelName);
// 使用 File.ReadAllBytes 方法将模型文件读取到内存中。
var bufferedModel = File.ReadAllBytes(modPath);
// 多个任务可以使用同一个工厂来创建处理器
using var factory = WhisperFactory.FromBuffer(bufferedModel);
// 使用工厂创建一个新的构建器,并设置其语言
var builder = factory.CreateBuilder()
.WithLanguage(opt.Language);
using var processor = builder.Build();
// 打开一个文件流来读取由 opt.FileName 指定的音频文件
using var fileStream = File.OpenRead(opt.FilePath);
// 使用 WaveParser 类来解析音频文件
var wave = new WaveParser(fileStream);
// 使用 WaveParser 的 GetAvgSamples 方法获取音频文件的平均样本
var samples = wave.GetAvgSamples();
// 使用处理器的 DetectLanguage 方法检测音频样本中的语言
var language = processor.DetectLanguage(samples, speedUp: true);
Console.WriteLine("Language is " + language);
}
}
/// <summary>
/// 音频处理选项
/// </summary>
public class WhisperOption
{
/// <summary>
/// 传入目标文件路径
/// </summary>
/// <param name="file"></param>
public WhisperOption(string file)
{
FilePath = file;
}
/// <summary>
/// 指令类型
/// </summary>
public string Command { get; set; } = "transcribe";
/// <summary>
/// 音频文件默认要存放bin目录下
/// </summary>
public string FilePath { get; set; }
/// <summary>
/// 语言,默认自动选择
/// </summary>
public string Language { get; set; } = "chinese";
/// <summary>
/// 模型文件名称
/// </summary>
public string ModelName { get; set; } = "ggml-base.bin";
/// <summary>
/// 模型文件路径
/// </summary>
public string ModPath => Path.Combine(AppCommon.AIModelFile, ModelName);
/// <summary>
/// 模型类型
/// </summary>
//public GgmlType ModelType { get; set; } = GgmlType.Base;
}
}