优化 复习课切题提示词

This commit is contained in:
小肥羊 2025-06-04 17:44:33 +08:00
parent dd7e217bb2
commit 02518a1c4f
9 changed files with 174 additions and 76 deletions

View File

@ -154,7 +154,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var messageBuilder1 = new StringBuilder(); var messageBuilder1 = new StringBuilder();
var lastChat = new ChatResSSE(); var lastChat = new ChatResSSE();
var splitCount = "data:".Length; var splitCount = "data:".Length;
var maxLoop = 60*1000; var maxLoop = 60*10000;
int threshold = 0; int threshold = 0;
while (maxLoop>0) while (maxLoop>0)
{ {
@ -205,6 +205,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
} }
} }
} }
Console.WriteLine(DateTime.Now + "=>AI请求超时 " + chatReq.taskId);
return null; return null;
} }

View File

@ -38,7 +38,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
/// 一种替代温度采样的方法,称为原子核采样, 其中模型考虑具有top_p概率的标记的结果 质量。所以 0.1 表示仅包含前 10% 概率质量的代币 被考 /// 一种替代温度采样的方法,称为原子核采样, 其中模型考虑具有top_p概率的标记的结果 质量。所以 0.1 表示仅包含前 10% 概率质量的代币 被考
/// <para>建议与<see cref="ChatRequest.temperature"/>联动</para> /// <para>建议与<see cref="ChatRequest.temperature"/>联动</para>
/// </summary> /// </summary>
public float top_p { get; set; } = 0.5f; public float top_p { get; set; } = 0.1f;
/// <summary> /// <summary>
/// 一个对象,用于指定模型必须输出的格式。设置为 enable 结构化输出,确保模型与您提供的 JSON 匹配 图式。 /// 一个对象,用于指定模型必须输出的格式。设置为 enable 结构化输出,确保模型与您提供的 JSON 匹配 图式。
/// </summary> /// </summary>

View File

@ -18,6 +18,7 @@ using static System.Collections.Specialized.BitVector32;
using FFmpeg.NET.Services; using FFmpeg.NET.Services;
using Aliyun.OSS; using Aliyun.OSS;
using Yitter.IdGenerator; using Yitter.IdGenerator;
using VideoAnalysisCore.Common.Expand;
namespace VideoAnalysisCore.AICore.GPT.DeepSeek namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{ {
@ -172,10 +173,14 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{ {
//校验结果质量 //校验结果质量
var thems = JsonSerializer.Serialize(questionRes.Adapt<VideoKnowQueryDto[]>()); var thems = JsonSerializer.Serialize(questionRes.Adapt<VideoKnowQueryDto[]>());
var pptFormat = taskInfo.VideoType==AttachmentsInfoType.PPT
? "这堂课是习题课,所讲解内容都是试题。"
: string.Empty;
var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}"""; var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}""";
var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" + var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" +
$"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" + $"检查这些分段的时间是否合理 与相邻的时间段间隔是否大于30秒?" +
$"分段的主题内容,知识点分配是否合理符合实际吗?" + $"分段的主题内容,知识点分配是否合理符合实际吗?" +
$"{pptFormat}" +
$"请给出你的打分(0-100,70分及格)以及打分原因。" + $"请给出你的打分(0-100,70分及格)以及打分原因。" +
$"这是我的分段 {thems}。" + $"这是我的分段 {thems}。" +
$"后续的内容是包含时间戳的视频字幕的固定格式文本。" + $"后续的内容是包含时间戳的视频字幕的固定格式文本。" +
@ -257,33 +262,39 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
TotalCaptionsDto captions, string sections) TotalCaptionsDto captions, string sections)
{ {
var tryCount = 10; var tryCount = 10;
while (tryCount-->10) while (tryCount-->0)
{ {
try try
{ {
var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode) var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode)
? string.Empty ? string.Empty
: $"视频授课内容发生了变化的时间节点{taskInfo.PPTKeyFrame},授课阶段应当在附近时间发生变化。"; : $"图像视频中授课内容PPT发生了变化的时间节点是{taskInfo.PPTKeyFrame},授课阶段结果可以参考这些时间节点。";
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]"""; var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]""";
var exerciseClass = taskInfo?.VideoType == AttachmentsInfoType.Review var exerciseClass = taskInfo?.VideoType == AttachmentsInfoType.Review
? $"本堂课是习题课,每个阶段因当是不同得例题讲解片段,并且所有的授课阶段都视为例题精讲。" ? $"但是本堂课是习题课,所以每个阶段是不同的例题讲解内容。"
: $"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。" + : string.Empty;
$"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" +
$"通过生成的内容总结分析出对应的授课阶段主题。 ";
//$"请注意 本次分析的视频字幕只是其中一部分 不需要分析出所有类型的授课阶段。"; //$"请注意 本次分析的视频字幕只是其中一部分 不需要分析出所有类型的授课阶段。";
var postMessages = var postMessages =
$"请通过视频字幕内容分析出视频中{taskInfo.Subject}课堂的授课阶段。" + $"请通过视频字幕内容分析出视频中课堂的授课阶段。" +
$"课堂内容与{sections}章节相关。" + $"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。" +
$"{keyFrameArr}" + $"{keyFrameArr}" +
$"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。"+
$"{exerciseClass}" + $"{exerciseClass}" +
$"最后请检查每个授课阶段的时长,不允许出现超出800秒或者低于50秒的授课阶段。" + $"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" +
$"通过生成的内容总结分析出对应的授课阶段主题。 "+
$"最后请检查每个授课阶段的时长,不允许出现低于50秒的阶段。" +
$"输出内容只返回json格式({resFormat})" + $"输出内容只返回json格式({resFormat})" +
$"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" + $"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" +
$"字幕列表 {captions.Captions} 字幕结束!"; $"字幕列表 {captions.Captions} 字幕结束!";
Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id.ToString()}.开始分析视频内容 {tryCount}"); Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id.ToString()}.开始分析视频内容 {tryCount}");
questionRes.AddRange(await ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕")); var resData = await ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
if (taskInfo?.VideoType == AttachmentsInfoType.Review)
foreach (var item in resData)
item.Stage = StageEnum..ToString();
questionRes.AddRange(resData);
break;
} }
catch (Exception ex) catch (Exception ex)
{ {
@ -305,6 +316,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{ {
taskId = task, taskId = task,
model = model, model = model,
max_tokens= model== "deepseek-reasoner"?16000:8000,
stream = true, stream = true,
temperature = 0.2f, temperature = 0.2f,
messages = messageArr messages = messageArr
@ -325,13 +337,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
redisCached[1] = new object[] { chatResp.Value.res, chatResp.Value.u, chatResp.Value.reasoning }; redisCached[1] = new object[] { chatResp.Value.res, chatResp.Value.u, chatResp.Value.reasoning };
RedisExpand.SetTaskGPTCached(task, time, redisCached); RedisExpand.SetTaskGPTCached(task, time, redisCached);
} }
chatResContent = chatResContent?.Replace("字幕内容", "课堂情况"); chatResContent = chatResContent?.ExtractJsonStrings()?.FirstOrDefault();
chatResContent = chatResContent?.Replace("\n", ""); chatResContent = chatResContent?.Replace("\n", "");
chatResContent = chatResContent?.Replace("```json", ""); chatResContent = chatResContent?.Replace("```json", "");
chatResContent = chatResContent?.Replace("```", ""); chatResContent = chatResContent?.Replace("```", "");
chatResContent = chatResContent?.Replace("}{", "},{"); chatResContent = chatResContent?.Replace("}{", "},{");
chatResContent = chatResContent?.Replace("}|{", "},{"); chatResContent = chatResContent?.Replace("}|{", "},{");
chatResContent = chatResContent?.Trim().ExtractJson().FirstOrDefault(); chatResContent = chatResContent?.Trim();
var startsStr = typeof(T).IsArray ? "[" : "{"; var startsStr = typeof(T).IsArray ? "[" : "{";
var endStr = typeof(T).IsArray ? "]" : "}"; var endStr = typeof(T).IsArray ? "]" : "}";
@ -393,17 +405,21 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
.ProcessImageAsync(new SimpleTexOcrRequest(filePath)); .ProcessImageAsync(new SimpleTexOcrRequest(filePath));
if (!sRes.Success) if (!sRes.Success)
continue; continue;
var knowArr=string.Join(',', knowInfoArr.Select(s => s.KnowPointId + "|" + s.KnowPoint )); if(sRes.Result.res.value.Trim().Length<10)//总试题内容长度小于10 视为无效题目
break;
Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.First().StartTime}秒试题的试题内容");
Console.WriteLine( sRes.Result.res.value);
var knowArr=JsonSerializer.Serialize(knowInfoArr.Select(s => new { s.KnowPointId, s.KnowPoint }));
var resFormat = """[{"TopicStem":string(试题题干),"Question:string()","KnowPointId":(string)知识点ID}]"""; var resFormat = """[{"TopicStem":string(试题题干),"Question:string()","KnowPointId":(string)知识点ID}]""";
var postMessages = var postMessages =
$"提供一段内容是md格式的试题内容字符串。" + $"我将提供一段内容是md格式的试题内容字符串。" +
$"请提取出其中的试题内容。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" + $"请提取出其中的试题内容以及每个试题的问题。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" +
$"知识点格式(知识点ID|知识点名称)范围[{knowArr}]。" + $"知识点格式(json格式)范围[{knowArr}]。" +
$"排除不是试题内容的文字,优化公式排版并且去除题号。" + $"排除不是试题内容的文字,优化公式排版并且去除题号。" +
$"如果存在多道大题,请帮忙拆分开!" + $"如果存在多道题(或者小问),则需要拆分成为多个试题对象!" +
$"输出内容只返回json格式为({resFormat})" + $"输出内容只返回json格式为({resFormat})" +
$"以下是试题内容" + $"以下是试题内容" +
$"`{sRes.Result.res.info.markdown}`"; $"`{sRes.Result.res.value}`";
var resData = await ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题", "deepseek-chat"); var resData = await ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题", "deepseek-chat");
foreach (var q in resData) foreach (var q in resData)
{ {

View File

@ -102,50 +102,6 @@ namespace VideoAnalysisCore.Common
return Path.Combine(task.Id.ToString().LocalPath(), $"{FrameName}{(fTime / 5).ToString().PadLeft(3,'0')}.jpg"); return Path.Combine(task.Id.ToString().LocalPath(), $"{FrameName}{(fTime / 5).ToString().PadLeft(3,'0')}.jpg");
} }
/// <summary>
/// 识别字符串中的json字符串
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static List<string> ExtractJson(this string input)
{
List<string> jsonList = new List<string>();
int index = 0;
while (index < input.Length)
{
if (input[index] == '{' || input[index] == '[')
{
int startIndex = index;
int openCount = 1;
index++;
while (index < input.Length && openCount > 0)
{
if (input[index] == '{' || input[index] == '[')
{
openCount++;
}
else if (input[index] == '}' || input[index] == ']')
{
openCount--;
}
index++;
}
if (openCount == 0)
{
string json = input.Substring(startIndex, index - startIndex);
jsonList.Add(json);
}
}
else
{
index++;
}
}
return jsonList;
}
/// <summary> /// <summary>
/// 处理数学公式 /// 处理数学公式
/// </summary> /// </summary>

View File

@ -4,6 +4,7 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using System.Threading.Tasks; using System.Threading.Tasks;
using VideoAnalysisCore.Common.Expand;
namespace VideoAnalysisCore.Common namespace VideoAnalysisCore.Common
{ {

View File

@ -10,9 +10,9 @@ using AlibabaCloud.TeaUtil.Models;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using VideoAnalysisCore.Job; using VideoAnalysisCore.Job;
namespace VideoAnalysisCore.Common namespace VideoAnalysisCore.Common.Expand
{ {
public class AlibabaCloudVodConfig public class AlibabaCloudVodConfig
{ {
/// <summary> /// <summary>
/// id /// id

View File

@ -18,7 +18,7 @@ using Aliyun.Credentials.Models;
using VideoAnalysisCore.Model.Dto; using VideoAnalysisCore.Model.Dto;
using System.IO; using System.IO;
namespace VideoAnalysisCore.Common namespace VideoAnalysisCore.Common.Expand
{ {
public class AliyunOSSConfig public class AliyunOSSConfig
{ {
@ -65,13 +65,13 @@ namespace VideoAnalysisCore.Common
// 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。 // 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
AccessKeySecret = AppCommon.Config.AliyunOSS.AccessKeySecret, AccessKeySecret = AppCommon.Config.AliyunOSS.AccessKeySecret,
Endpoint = AppCommon.Config.AliyunOSS.Endpoint, Endpoint = AppCommon.Config.AliyunOSS.Endpoint,
Region= AppCommon.Config.AliyunOSS.Region, Region = AppCommon.Config.AliyunOSS.Region,
};// 创建ClientConfiguration实例按照您的需要修改默认参数。 };// 创建ClientConfiguration实例按照您的需要修改默认参数。
var conf = new ClientConfiguration(); var conf = new ClientConfiguration();
// 设置v4签名。 // 设置v4签名。
conf.SignatureVersion = SignatureVersion.V4; conf.SignatureVersion = SignatureVersion.V4;
// 创建OssClient实例。 // 创建OssClient实例。
var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf); var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf);
oss.SetRegion(config.Region); oss.SetRegion(config.Region);
service.AddSingleton(oss); service.AddSingleton(oss);
@ -81,7 +81,7 @@ namespace VideoAnalysisCore.Common
/// </summary> /// </summary>
/// <param name="oss"></param> /// <param name="oss"></param>
/// <param name="fileArr">视频实体片段</param> /// <param name="fileArr">视频实体片段</param>
public static void AddVideoQuestionUrl(this OssClient oss, List<VideoQuestionOSSDto> fileArr ) public static void AddVideoQuestionUrl(this OssClient oss, List<VideoQuestionOSSDto> fileArr)
{ {
var cached = new HashSet<string>(); var cached = new HashSet<string>();
foreach (var item in fileArr) foreach (var item in fileArr)
@ -89,7 +89,7 @@ namespace VideoAnalysisCore.Common
try try
{ {
var path = item.VideoTaskId.ToString() + "/" + Path.GetFileName(item.FilePath); var path = item.VideoTaskId.ToString() + "/" + Path.GetFileName(item.FilePath);
if (cached.Contains(item.FilePath)) if (cached.Contains(item.FilePath))
{ {
item.ImageUrl = AppCommon.Config.AliyunOSS.BucketDomain + "/" + path; item.ImageUrl = AppCommon.Config.AliyunOSS.BucketDomain + "/" + path;
continue; continue;

View File

@ -7,6 +7,8 @@ using System.Net.Http.Json;
using System.Net.Sockets; using System.Net.Sockets;
using System.Security.Cryptography; using System.Security.Cryptography;
using System.Text; using System.Text;
using System.Text.Json.Serialization;
using System.Text.Json;
using System.Threading.Tasks; using System.Threading.Tasks;
using AlibabaCloud.OpenApiClient.Models; using AlibabaCloud.OpenApiClient.Models;
using AlibabaCloud.SDK.Vod20170321; using AlibabaCloud.SDK.Vod20170321;
@ -15,10 +17,9 @@ using AlibabaCloud.TeaUtil.Models;
using Azure; using Azure;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using Newtonsoft.Json;
using VideoAnalysisCore.Job; using VideoAnalysisCore.Job;
namespace VideoAnalysisCore.Common namespace VideoAnalysisCore.Common.Expand
{ {
@ -32,7 +33,10 @@ namespace VideoAnalysisCore.Common
public class SimpleTexOcrResponseDataRes public class SimpleTexOcrResponseDataRes
{ {
public string type { get; set; } public string type { get; set; }
public SimpleTexOcrResponseDataInfo info { get; set; }
[JsonPropertyName("info")] // 替换为实际字段名
public JsonElement DataInfo { get; set; } // 使用JsonElement接收未知类型
public string value { get; set; }
} }
public class SimpleTexOcrResponseDataInfo public class SimpleTexOcrResponseDataInfo
@ -81,6 +85,7 @@ namespace VideoAnalysisCore.Common
{ {
public bool Success { get; set; } public bool Success { get; set; }
public SimpleTexOcrResponseData Result { get; set; } public SimpleTexOcrResponseData Result { get; set; }
public string ResultStr { get; set; }
public string Error { get; set; } public string Error { get; set; }
} }
@ -123,7 +128,7 @@ namespace VideoAnalysisCore.Common
if (request.isolated_formula_wrapper != null) if (request.isolated_formula_wrapper != null)
{ {
var isolatedWrapper = JsonConvert.SerializeObject(request.isolated_formula_wrapper); var isolatedWrapper = JsonSerializer.Serialize(request.isolated_formula_wrapper);
content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper)); content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper));
parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper; parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper;
} }
@ -161,11 +166,25 @@ namespace VideoAnalysisCore.Common
var response = await client.SendAsync(requestMessage); var response = await client.SendAsync(requestMessage);
var resStr = await response.Content.ReadAsStringAsync(); var resStr = await response.Content.ReadAsStringAsync();
var responseContent = await response.Content.ReadFromJsonAsync<SimpleTexOcrResponseData>(); var responseContent = await response.Content.ReadFromJsonAsync<SimpleTexOcrResponseData>();
if (responseContent.res.DataInfo.ValueKind == JsonValueKind.Object)
{
responseContent.res.value = JsonSerializer.Deserialize<SimpleTexOcrResponseDataInfo>(
responseContent.res.DataInfo.GetRawText(),
new JsonSerializerOptions { PropertyNameCaseInsensitive = true }
)?.markdown??string.Empty;
// 处理字符串
}
else if (responseContent.res.DataInfo.ValueKind == JsonValueKind.String)
{
responseContent.res.value = responseContent.res.DataInfo.GetString();
}
request.file.Dispose(); request.file.Dispose();
return new SimpleTexOcrResponse return new SimpleTexOcrResponse
{ {
Success = response.IsSuccessStatusCode, Success = response.IsSuccessStatusCode,
Result = responseContent, Result = responseContent,
ResultStr= resStr,
Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}" Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}"
}; };
} }

View File

@ -0,0 +1,105 @@
using System;
using System.Collections.Generic;
using System.Text.Json;
namespace VideoAnalysisCore.Common
{
public static class JsonExtractor
{
/// <summary>
/// 提取json字符串
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static List<string> ExtractJsonStrings(this string input)
{
List<string> jsonList = new List<string>();
int index = 0;
while (index < input.Length)
{
if (input[index] == '{' || input[index] == '[')
{
int? endIndex = FindMatchingBracket(input, index);
if (endIndex.HasValue)
{
string candidate = input.Substring(index, endIndex.Value - index + 1);
if (IsValidJson(candidate))
{
jsonList.Add(candidate);
index = endIndex.Value + 1;
continue;
}
}
}
index++;
}
return jsonList;
}
private static int? FindMatchingBracket(string str, int start)
{
Stack<char> stack = new Stack<char>();
bool inString = false;
bool inEscape = false;
for (int i = start; i < str.Length; i++)
{
char c = str[i];
if (inEscape)
{
inEscape = false;
}
else if (inString)
{
if (c == '\\')
inEscape = true;
else if (c == '"')
inString = false;
}
else
{
switch (c)
{
case '{':
case '[':
stack.Push(c);
break;
case '}':
if (stack.Count == 0 || stack.Peek() != '{')
return null;
stack.Pop();
break;
case ']':
if (stack.Count == 0 || stack.Peek() != '[')
return null;
stack.Pop();
break;
case '"':
inString = true;
break;
}
}
if (stack.Count == 0)
return i;
}
return null; // 括号未完全匹配
}
public static bool IsValidJson(string candidate)
{
if (string.IsNullOrEmpty(candidate))
return false;
try
{
JsonDocument.Parse(candidate);
return true;
}
catch (Exception)
{
return false;
}
}
}
}