优化 复习课切题提示词

This commit is contained in:
小肥羊 2025-06-04 17:44:33 +08:00
parent dd7e217bb2
commit 02518a1c4f
9 changed files with 174 additions and 76 deletions

View File

@ -154,7 +154,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var messageBuilder1 = new StringBuilder();
var lastChat = new ChatResSSE();
var splitCount = "data:".Length;
var maxLoop = 60*1000;
var maxLoop = 60*10000;
int threshold = 0;
while (maxLoop>0)
{
@ -205,6 +205,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
}
}
}
Console.WriteLine(DateTime.Now + "=>AI请求超时 " + chatReq.taskId);
return null;
}

View File

@ -38,7 +38,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
/// 一种替代温度采样的方法,称为原子核采样, 其中模型考虑具有top_p概率的标记的结果 质量。所以 0.1 表示仅包含前 10% 概率质量的代币 被考
/// <para>建议与<see cref="ChatRequest.temperature"/>联动</para>
/// </summary>
public float top_p { get; set; } = 0.5f;
public float top_p { get; set; } = 0.1f;
/// <summary>
/// 一个对象,用于指定模型必须输出的格式。设置为 enable 结构化输出,确保模型与您提供的 JSON 匹配 图式。
/// </summary>

View File

@ -18,6 +18,7 @@ using static System.Collections.Specialized.BitVector32;
using FFmpeg.NET.Services;
using Aliyun.OSS;
using Yitter.IdGenerator;
using VideoAnalysisCore.Common.Expand;
namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{
@ -172,10 +173,14 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{
//校验结果质量
var thems = JsonSerializer.Serialize(questionRes.Adapt<VideoKnowQueryDto[]>());
var pptFormat = taskInfo.VideoType==AttachmentsInfoType.PPT
? "这堂课是习题课,所讲解内容都是试题。"
: string.Empty;
var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}""";
var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" +
$"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" +
$"检查这些分段的时间是否合理 与相邻的时间段间隔是否大于30秒?" +
$"分段的主题内容,知识点分配是否合理符合实际吗?" +
$"{pptFormat}" +
$"请给出你的打分(0-100,70分及格)以及打分原因。" +
$"这是我的分段 {thems}。" +
$"后续的内容是包含时间戳的视频字幕的固定格式文本。" +
@ -257,33 +262,39 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
TotalCaptionsDto captions, string sections)
{
var tryCount = 10;
while (tryCount-->10)
while (tryCount-->0)
{
try
{
var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode)
? string.Empty
: $"视频授课内容发生了变化的时间节点{taskInfo.PPTKeyFrame},授课阶段应当在附近时间发生变化。";
: $"图像视频中授课内容PPT发生了变化的时间节点是{taskInfo.PPTKeyFrame},授课阶段结果可以参考这些时间节点。";
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]""";
var exerciseClass = taskInfo?.VideoType == AttachmentsInfoType.Review
? $"本堂课是习题课,每个阶段因当是不同得例题讲解片段,并且所有的授课阶段都视为例题精讲。"
: $"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。" +
$"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" +
$"通过生成的内容总结分析出对应的授课阶段主题。 ";
? $"但是本堂课是习题课,所以每个阶段是不同的例题讲解内容。"
: string.Empty;
//$"请注意 本次分析的视频字幕只是其中一部分 不需要分析出所有类型的授课阶段。";
var postMessages =
$"请通过视频字幕内容分析出视频中{taskInfo.Subject}课堂的授课阶段。" +
$"课堂内容与{sections}章节相关。" +
$"请通过视频字幕内容分析出视频中课堂的授课阶段。" +
$"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。" +
$"{keyFrameArr}" +
$"完整的课堂标准流程包含以下5个阶段课程引入/新知讲解/例题精讲/课堂练习/知识总结。"+
$"{exerciseClass}" +
$"最后请检查每个授课阶段的时长,不允许出现超出800秒或者低于50秒的授课阶段。" +
$"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" +
$"通过生成的内容总结分析出对应的授课阶段主题。 "+
$"最后请检查每个授课阶段的时长,不允许出现低于50秒的阶段。" +
$"输出内容只返回json格式({resFormat})" +
$"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" +
$"字幕列表 {captions.Captions} 字幕结束!";
Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id.ToString()}.开始分析视频内容 {tryCount}");
questionRes.AddRange(await ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕"));
var resData = await ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
if (taskInfo?.VideoType == AttachmentsInfoType.Review)
foreach (var item in resData)
item.Stage = StageEnum..ToString();
questionRes.AddRange(resData);
break;
}
catch (Exception ex)
{
@ -305,6 +316,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
{
taskId = task,
model = model,
max_tokens= model== "deepseek-reasoner"?16000:8000,
stream = true,
temperature = 0.2f,
messages = messageArr
@ -325,13 +337,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
redisCached[1] = new object[] { chatResp.Value.res, chatResp.Value.u, chatResp.Value.reasoning };
RedisExpand.SetTaskGPTCached(task, time, redisCached);
}
chatResContent = chatResContent?.Replace("字幕内容", "课堂情况");
chatResContent = chatResContent?.ExtractJsonStrings()?.FirstOrDefault();
chatResContent = chatResContent?.Replace("\n", "");
chatResContent = chatResContent?.Replace("```json", "");
chatResContent = chatResContent?.Replace("```", "");
chatResContent = chatResContent?.Replace("}{", "},{");
chatResContent = chatResContent?.Replace("}|{", "},{");
chatResContent = chatResContent?.Trim().ExtractJson().FirstOrDefault();
chatResContent = chatResContent?.Trim();
var startsStr = typeof(T).IsArray ? "[" : "{";
var endStr = typeof(T).IsArray ? "]" : "}";
@ -393,17 +405,21 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
.ProcessImageAsync(new SimpleTexOcrRequest(filePath));
if (!sRes.Success)
continue;
var knowArr=string.Join(',', knowInfoArr.Select(s => s.KnowPointId + "|" + s.KnowPoint ));
if(sRes.Result.res.value.Trim().Length<10)//总试题内容长度小于10 视为无效题目
break;
Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.First().StartTime}秒试题的试题内容");
Console.WriteLine( sRes.Result.res.value);
var knowArr=JsonSerializer.Serialize(knowInfoArr.Select(s => new { s.KnowPointId, s.KnowPoint }));
var resFormat = """[{"TopicStem":string(试题题干),"Question:string()","KnowPointId":(string)知识点ID}]""";
var postMessages =
$"提供一段内容是md格式的试题内容字符串。" +
$"请提取出其中的试题内容。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" +
$"知识点格式(知识点ID|知识点名称)范围[{knowArr}]。" +
$"我将提供一段内容是md格式的试题内容字符串。" +
$"请提取出其中的试题内容以及每个试题的问题。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" +
$"知识点格式(json格式)范围[{knowArr}]。" +
$"排除不是试题内容的文字,优化公式排版并且去除题号。" +
$"如果存在多道大题,请帮忙拆分开!" +
$"如果存在多道题(或者小问),则需要拆分成为多个试题对象!" +
$"输出内容只返回json格式为({resFormat})" +
$"以下是试题内容" +
$"`{sRes.Result.res.info.markdown}`";
$"`{sRes.Result.res.value}`";
var resData = await ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题", "deepseek-chat");
foreach (var q in resData)
{

View File

@ -102,50 +102,6 @@ namespace VideoAnalysisCore.Common
return Path.Combine(task.Id.ToString().LocalPath(), $"{FrameName}{(fTime / 5).ToString().PadLeft(3,'0')}.jpg");
}
/// <summary>
/// 识别字符串中的json字符串
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static List<string> ExtractJson(this string input)
{
List<string> jsonList = new List<string>();
int index = 0;
while (index < input.Length)
{
if (input[index] == '{' || input[index] == '[')
{
int startIndex = index;
int openCount = 1;
index++;
while (index < input.Length && openCount > 0)
{
if (input[index] == '{' || input[index] == '[')
{
openCount++;
}
else if (input[index] == '}' || input[index] == ']')
{
openCount--;
}
index++;
}
if (openCount == 0)
{
string json = input.Substring(startIndex, index - startIndex);
jsonList.Add(json);
}
}
else
{
index++;
}
}
return jsonList;
}
/// <summary>
/// 处理数学公式
/// </summary>

View File

@ -4,6 +4,7 @@ using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using VideoAnalysisCore.Common.Expand;
namespace VideoAnalysisCore.Common
{

View File

@ -10,9 +10,9 @@ using AlibabaCloud.TeaUtil.Models;
using Microsoft.Extensions.DependencyInjection;
using VideoAnalysisCore.Job;
namespace VideoAnalysisCore.Common
namespace VideoAnalysisCore.Common.Expand
{
public class AlibabaCloudVodConfig
public class AlibabaCloudVodConfig
{
/// <summary>
/// id

View File

@ -18,7 +18,7 @@ using Aliyun.Credentials.Models;
using VideoAnalysisCore.Model.Dto;
using System.IO;
namespace VideoAnalysisCore.Common
namespace VideoAnalysisCore.Common.Expand
{
public class AliyunOSSConfig
{
@ -65,13 +65,13 @@ namespace VideoAnalysisCore.Common
// 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
AccessKeySecret = AppCommon.Config.AliyunOSS.AccessKeySecret,
Endpoint = AppCommon.Config.AliyunOSS.Endpoint,
Region= AppCommon.Config.AliyunOSS.Region,
Region = AppCommon.Config.AliyunOSS.Region,
};// 创建ClientConfiguration实例按照您的需要修改默认参数。
var conf = new ClientConfiguration();
// 设置v4签名。
conf.SignatureVersion = SignatureVersion.V4;
// 创建OssClient实例。
var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf);
var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf);
oss.SetRegion(config.Region);
service.AddSingleton(oss);
@ -81,7 +81,7 @@ namespace VideoAnalysisCore.Common
/// </summary>
/// <param name="oss"></param>
/// <param name="fileArr">视频实体片段</param>
public static void AddVideoQuestionUrl(this OssClient oss, List<VideoQuestionOSSDto> fileArr )
public static void AddVideoQuestionUrl(this OssClient oss, List<VideoQuestionOSSDto> fileArr)
{
var cached = new HashSet<string>();
foreach (var item in fileArr)
@ -89,7 +89,7 @@ namespace VideoAnalysisCore.Common
try
{
var path = item.VideoTaskId.ToString() + "/" + Path.GetFileName(item.FilePath);
if (cached.Contains(item.FilePath))
if (cached.Contains(item.FilePath))
{
item.ImageUrl = AppCommon.Config.AliyunOSS.BucketDomain + "/" + path;
continue;

View File

@ -7,6 +7,8 @@ using System.Net.Http.Json;
using System.Net.Sockets;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json.Serialization;
using System.Text.Json;
using System.Threading.Tasks;
using AlibabaCloud.OpenApiClient.Models;
using AlibabaCloud.SDK.Vod20170321;
@ -15,10 +17,9 @@ using AlibabaCloud.TeaUtil.Models;
using Azure;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using Newtonsoft.Json;
using VideoAnalysisCore.Job;
namespace VideoAnalysisCore.Common
namespace VideoAnalysisCore.Common.Expand
{
@ -32,7 +33,10 @@ namespace VideoAnalysisCore.Common
public class SimpleTexOcrResponseDataRes
{
public string type { get; set; }
public SimpleTexOcrResponseDataInfo info { get; set; }
[JsonPropertyName("info")] // 替换为实际字段名
public JsonElement DataInfo { get; set; } // 使用JsonElement接收未知类型
public string value { get; set; }
}
public class SimpleTexOcrResponseDataInfo
@ -81,6 +85,7 @@ namespace VideoAnalysisCore.Common
{
public bool Success { get; set; }
public SimpleTexOcrResponseData Result { get; set; }
public string ResultStr { get; set; }
public string Error { get; set; }
}
@ -123,7 +128,7 @@ namespace VideoAnalysisCore.Common
if (request.isolated_formula_wrapper != null)
{
var isolatedWrapper = JsonConvert.SerializeObject(request.isolated_formula_wrapper);
var isolatedWrapper = JsonSerializer.Serialize(request.isolated_formula_wrapper);
content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper));
parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper;
}
@ -161,11 +166,25 @@ namespace VideoAnalysisCore.Common
var response = await client.SendAsync(requestMessage);
var resStr = await response.Content.ReadAsStringAsync();
var responseContent = await response.Content.ReadFromJsonAsync<SimpleTexOcrResponseData>();
if (responseContent.res.DataInfo.ValueKind == JsonValueKind.Object)
{
responseContent.res.value = JsonSerializer.Deserialize<SimpleTexOcrResponseDataInfo>(
responseContent.res.DataInfo.GetRawText(),
new JsonSerializerOptions { PropertyNameCaseInsensitive = true }
)?.markdown??string.Empty;
// 处理字符串
}
else if (responseContent.res.DataInfo.ValueKind == JsonValueKind.String)
{
responseContent.res.value = responseContent.res.DataInfo.GetString();
}
request.file.Dispose();
return new SimpleTexOcrResponse
{
Success = response.IsSuccessStatusCode,
Result = responseContent,
ResultStr= resStr,
Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}"
};
}

View File

@ -0,0 +1,105 @@
using System;
using System.Collections.Generic;
using System.Text.Json;
namespace VideoAnalysisCore.Common
{
public static class JsonExtractor
{
/// <summary>
/// 提取json字符串
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static List<string> ExtractJsonStrings(this string input)
{
List<string> jsonList = new List<string>();
int index = 0;
while (index < input.Length)
{
if (input[index] == '{' || input[index] == '[')
{
int? endIndex = FindMatchingBracket(input, index);
if (endIndex.HasValue)
{
string candidate = input.Substring(index, endIndex.Value - index + 1);
if (IsValidJson(candidate))
{
jsonList.Add(candidate);
index = endIndex.Value + 1;
continue;
}
}
}
index++;
}
return jsonList;
}
private static int? FindMatchingBracket(string str, int start)
{
Stack<char> stack = new Stack<char>();
bool inString = false;
bool inEscape = false;
for (int i = start; i < str.Length; i++)
{
char c = str[i];
if (inEscape)
{
inEscape = false;
}
else if (inString)
{
if (c == '\\')
inEscape = true;
else if (c == '"')
inString = false;
}
else
{
switch (c)
{
case '{':
case '[':
stack.Push(c);
break;
case '}':
if (stack.Count == 0 || stack.Peek() != '{')
return null;
stack.Pop();
break;
case ']':
if (stack.Count == 0 || stack.Peek() != '[')
return null;
stack.Pop();
break;
case '"':
inString = true;
break;
}
}
if (stack.Count == 0)
return i;
}
return null; // 括号未完全匹配
}
public static bool IsValidJson(string candidate)
{
if (string.IsNullOrEmpty(candidate))
return false;
try
{
JsonDocument.Parse(candidate);
return true;
}
catch (Exception)
{
return false;
}
}
}
}