优化 gpt说话人流程 计算token数量流程

This commit is contained in:
小肥羊 2024-11-01 18:16:21 +08:00
parent 3dfe2fc087
commit 263041aa6a
8 changed files with 123 additions and 21 deletions

View File

@ -8,12 +8,15 @@ using VideoAnalysisCore.AICore.ChatGPT.KIMI;
using VideoAnalysisCore.AICore.SherpaOnnx; using VideoAnalysisCore.AICore.SherpaOnnx;
using SqlSugar; using SqlSugar;
namespace Learn.VideoAnalysis namespace Learn.VideoAnalysis
{ {
public class Program public class Program
{ {
public static void Main(string[] args) public static void Main(string[] args)
{ {
var builder = WebApplication.CreateBuilder(args); var builder = WebApplication.CreateBuilder(args);
// Add services to the container. // Add services to the container.

View File

@ -0,0 +1,40 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using VideoAnalysisCore.Enum;
using VideoAnalysisCore.Model.Dto;
namespace VideoAnalysisCore.AICore.ChatGPT.Dto
{
public class Assessment
{
}
public class CallGPTRes
{
/// <summary>
/// 教师发言时间
/// <para>秒</para>
/// </summary>
public decimal TeacherSpeaking { get; set; } = 0;
/// <summary>
/// 学生发言时间
/// <para>秒</para>
/// </summary>
public decimal StudentSpeaking { get; set; } = 0;
/// <summary>
/// 视频时间轴
/// </summary>
public IEnumerable<TimeBase>? TimeBase { get; set; }
/// <summary>
/// GPT模型id
/// </summary>
public string GPTModel { get; set; } = string.Empty;
/// <summary>
/// AI综合评估
/// </summary>
public Assessment Assessment { get; set; }
}
}

View File

@ -9,6 +9,9 @@ using System.Text;
using FFmpeg.NET.Services; using FFmpeg.NET.Services;
using Microsoft.Extensions.Primitives; using Microsoft.Extensions.Primitives;
using VideoAnalysisCore.AICore.ChatGPT.Dto; using VideoAnalysisCore.AICore.ChatGPT.Dto;
using System.ComponentModel.DataAnnotations;
using VideoAnalysisCore.Enum;
using System.Reflection;
namespace VideoAnalysisCore.AICore.ChatGPT.KIMI namespace VideoAnalysisCore.AICore.ChatGPT.KIMI
{ {
@ -50,15 +53,21 @@ namespace VideoAnalysisCore.AICore.ChatGPT.KIMI
criteriaBuilder.Append(item.NamePrompt); criteriaBuilder.Append(item.NamePrompt);
criteriaBuilder.Append("返回bool值|"); criteriaBuilder.Append("返回bool值|");
} }
criteriaBuilder.Append("100:分析授课中使用的高频词10个|"); //拼接枚举提问
criteriaBuilder.Append("101:分析授课中引发学生思考问题的时间段数组|"); foreach (var value in System.Enum.GetValues(typeof(QuestionTypeEnum)))
criteriaBuilder.Append("102:分析授课中让学生产生讨论欲望的时间段|"); {
criteriaBuilder.Append("104:分析授课中巧妙地引入学习主题的时间段|"); var enumValue = (QuestionTypeEnum)value;
criteriaBuilder.Append("105:分析授课中教学语言简明清晰准确生动的时间段|"); var displayAttribute = enumValue.GetType()
criteriaBuilder.Append("106:分析授课中教师激发学生思考的时间段|"); .GetField(enumValue.ToString())?
criteriaBuilder.Append("107:统计授课中教师提问类型的次数 类型重复回答,老师追问,简单性表演,老师补充答案,表扬并补充答案|"); .GetCustomAttribute<DisplayAttribute>();
if (displayAttribute == null) continue;
criteriaBuilder.Append(enumValue.GetHashCode());
criteriaBuilder.Append(":");
criteriaBuilder.Append(displayAttribute.Prompt);
criteriaBuilder.Append("|");
}
var resFormat = "问题编号:int,结果:array,问题解释:string"; var resFormat = "问题编号:int,结果:array|bool,问题解释:string";
var postMessages = var postMessages =
$"以下是一段音频的字幕,分析这段字幕(格式 说话人:开始秒:结束秒:内容|下一段字幕)." + $"以下是一段音频的字幕,分析这段字幕(格式 说话人:开始秒:结束秒:内容|下一段字幕)." +
$"来简明的回答提出的问题 问题列表 {criteriaBuilder} " + $"来简明的回答提出的问题 问题列表 {criteriaBuilder} " +
@ -67,24 +76,32 @@ namespace VideoAnalysisCore.AICore.ChatGPT.KIMI
var modelsResp = await moonshotClient.ListModels(); var modelsResp = await moonshotClient.ListModels();
if (modelsResp is null || modelsResp.data.Count == 0) if (modelsResp is null || modelsResp.data.Count == 0)
throw new Exception("未获取KIMI模型类型"); throw new Exception("未获取KIMI模型类型");
var reqTokenCount =await moonshotClient.GetAsTiMateTokenCount(postMessages);
var modelId = reqTokenCount > 32 * 1000 ? "moonshot-v1-128k" : "moonshot-v1-32k";
var chatRep = new ChatReq var chatRep = new ChatReq
{ {
max_tokens = postMessages.Length * 2, max_tokens =1000 * 31,
temperature = 0.3, temperature = 0.3,
frequency_penalty = 0, frequency_penalty = 0,
presence_penalty = 0, presence_penalty = 0,
model = modelsResp.data.First(s=>s.id.Contains("v1-32k")).id, model = modelId,
messages = new List<MessagesItem>(){ messages = new List<MessagesItem>(){
new MessagesItem(postMessages,"system"), new MessagesItem(postMessages,"system"),
//todo 规定返回json格式
//new MessagesItem(postMessages,"assistant"),
} }
}; };
var chatResp = await moonshotClient.Chat(chatRep); var chatResp = await moonshotClient.Chat(chatRep);
var chatResContent = chatResp?.choices.FirstOrDefault()?.message.content; if(chatResp is null || chatResp.error != null)
if(chatResp is null || string.IsNullOrEmpty(chatResContent))
throw new Exception($"KIMI模型返回异常 Chat 请求参数: {JsonSerializer.Serialize(chatRep)} " + throw new Exception($"KIMI模型返回异常 Chat 请求参数: {JsonSerializer.Serialize(chatRep)} " +
$" chatResp {chatResp} chatResContent {chatResContent}"); $" chatResp {JsonSerializer.Serialize(chatResp)}");
var chatResContent = chatResp?.choices.FirstOrDefault()?.message.content;
var questionRes = JsonSerializer.Deserialize<QuestionRes[]>(chatResContent); var questionRes = JsonSerializer.Deserialize<QuestionRes[]>(chatResContent);
//分析gpt返回结果 //todo 分析gpt返回结果
//todo 分析上课时间段情况 分析 独立学习 小组合作 随堂练习等情况
} }
} }
} }

View File

@ -56,12 +56,16 @@ namespace VideoAnalysisCore.AICore.ChatGPT.KIMI
{ {
var requestBody = Newtonsoft.Json.JsonConvert.SerializeObject(chatReq); var requestBody = Newtonsoft.Json.JsonConvert.SerializeObject(chatReq);
var chatResp = await PostJsonStreamAsync("/v1/chat/completions", requestBody); var chatResp = await PostJsonStreamAsync("/v1/chat/completions", requestBody);
var resStr = chatResp.Content.ReadAsStringAsync();
return await chatResp.Content.ReadFromJsonAsync<ChatRes>(); return await chatResp.Content.ReadFromJsonAsync<ChatRes>();
} }
/// <summary> /// <summary>
/// Get as timate token count /// 计算token长度
/// </summary> /// </summary>
/// <param name="chatReqText">文本</param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public async Task<int?> GetAsTiMateTokenCount(string chatReqText) public async Task<int?> GetAsTiMateTokenCount(string chatReqText)
{ {
var response = await PostJsonAsync("/v1/tokenizers/estimate-token-count", chatReqText); var response = await PostJsonAsync("/v1/tokenizers/estimate-token-count", chatReqText);

View File

@ -16,6 +16,7 @@
/// ///
/// </summary> /// </summary>
public string role { get; set; } public string role { get; set; }
public bool partial { get; set; } = false;
/// <summary> /// <summary>
/// ///
@ -134,8 +135,21 @@
} }
public class ChatResError
{
/// <summary>
/// 错误信息
/// </summary>
public string message { get; set; } = string.Empty;
/// <summary>
/// 错误类型
/// </summary>
public string type { get; set; } = string.Empty;
}
public class ChatRes public class ChatRes
{ {
public ChatResError? error { get; set; }
public string id { get; set; } public string id { get; set; }
public int created { get; set; } public int created { get; set; }
/// <summary> /// <summary>

View File

@ -164,10 +164,11 @@ namespace VideoAnalysisCore.Common
else else
studentSpeaking += speakerRes.Total; studentSpeaking += speakerRes.Total;
spList.Add(speakerRes.SpeakerIndex); spList.Add(speakerRes.SpeakerIndex);
break;
} }
} }
results.Add(segment, spList); var sp = spList.Distinct().ToList();
if(sp.Count>0)
results.Add(segment, sp);
} }
//拼接 提示词字幕源 //拼接 提示词字幕源
var stringBuilder = new StringBuilder(); var stringBuilder = new StringBuilder();
@ -182,8 +183,6 @@ namespace VideoAnalysisCore.Common
stringBuilder.Append(item.Key.Text); stringBuilder.Append(item.Key.Text);
stringBuilder.Append("|"); stringBuilder.Append("|");
} }
//todo 分析上课时间段情况 分析 独立学习 小组合作 随堂练习等情况
return new TotalCaptionsDto return new TotalCaptionsDto
{ {
StudentSpeaking = studentSpeaking, StudentSpeaking = studentSpeaking,
@ -193,7 +192,8 @@ namespace VideoAnalysisCore.Common
{ {
Start = s.Key.Start.TotalSeconds, Start = s.Key.Start.TotalSeconds,
End = s.Key.End.TotalSeconds, End = s.Key.End.TotalSeconds,
Type = s.Value.Count == 1 && s.Value.First() == techerId Content = s.Key.Text,
TimeBaseType = s.Value.Count == 1 && s.Value.First() == techerId
? TimeBaseTypeEnum. ? TimeBaseTypeEnum.
: TimeBaseTypeEnum. : TimeBaseTypeEnum.
}) })

View File

@ -0,0 +1,20 @@
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace VideoAnalysisCore.Enum
{
enum QuestionTypeEnum
{
[Display(Prompt = "分析授课中使用的高频词10个")]
=100,
[Display(Prompt = "分析字幕中每5分钟的一个概览 返回结构 array[0]= ")]
= 101,
[Display(Prompt = "统计授课中教师提问类型的次数 类型重复回答,老师追问,简单性表演,老师补充答案,表扬并补充答案")]
= 102,
}
}

View File

@ -20,9 +20,13 @@ namespace VideoAnalysisCore.Model.Dto
/// </summary> /// </summary>
public double End { get; set; } public double End { get; set; }
/// <summary> /// <summary>
/// 字幕内容
/// </summary>
public string Content { get; set; }
/// <summary>
/// 时间段 类型 /// 时间段 类型
/// </summary> /// </summary>
public TimeBaseTypeEnum Type { get; set; } public TimeBaseTypeEnum TimeBaseType { get; set; }
} }
public class TotalCaptionsDto public class TotalCaptionsDto