优化 复习课切题提示词
This commit is contained in:
parent
dd7e217bb2
commit
02518a1c4f
|
|
@ -154,7 +154,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
var messageBuilder1 = new StringBuilder();
|
||||
var lastChat = new ChatResSSE();
|
||||
var splitCount = "data:".Length;
|
||||
var maxLoop = 60*1000;
|
||||
var maxLoop = 60*10000;
|
||||
int threshold = 0;
|
||||
while (maxLoop>0)
|
||||
{
|
||||
|
|
@ -205,6 +205,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
}
|
||||
}
|
||||
}
|
||||
Console.WriteLine(DateTime.Now + "=>AI请求超时 " + chatReq.taskId);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
/// 一种替代温度采样的方法,称为原子核采样, 其中,模型考虑具有top_p概率的标记的结果 质量。所以 0.1 表示仅包含前 10% 概率质量的代币 被考
|
||||
/// <para>建议与<see cref="ChatRequest.temperature"/>联动</para>
|
||||
/// </summary>
|
||||
public float top_p { get; set; } = 0.5f;
|
||||
public float top_p { get; set; } = 0.1f;
|
||||
/// <summary>
|
||||
/// 一个对象,用于指定模型必须输出的格式。设置为 enable 结构化输出,确保模型与您提供的 JSON 匹配 图式。
|
||||
/// </summary>
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ using static System.Collections.Specialized.BitVector32;
|
|||
using FFmpeg.NET.Services;
|
||||
using Aliyun.OSS;
|
||||
using Yitter.IdGenerator;
|
||||
using VideoAnalysisCore.Common.Expand;
|
||||
|
||||
namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
||||
{
|
||||
|
|
@ -172,10 +173,14 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
{
|
||||
//校验结果质量
|
||||
var thems = JsonSerializer.Serialize(questionRes.Adapt<VideoKnowQueryDto[]>());
|
||||
var pptFormat = taskInfo.VideoType==AttachmentsInfoType.PPT
|
||||
? "这堂课是习题课,所讲解内容都是试题。"
|
||||
: string.Empty;
|
||||
var checkResFormat = """{"Score":打分(number),"Evaluation":评价(string)""";//,"Data":优化后的分段(array)}""";
|
||||
var checkMessage = "我为视频的讲解内容做了一些分段,希望你能通读字幕内容后检查下的分段是否符合我的要求?" +
|
||||
$"检查这些分段的时间是否合理 与相邻的时间段间隔是否处于合理区间30~900秒之间?" +
|
||||
$"检查这些分段的时间是否合理 与相邻的时间段间隔是否大于30秒?" +
|
||||
$"分段的主题内容,知识点分配是否合理符合实际吗?" +
|
||||
$"{pptFormat}" +
|
||||
$"请给出你的打分(0-100,70分及格)以及打分原因。" +
|
||||
$"这是我的分段 {thems}。" +
|
||||
$"后续的内容是包含时间戳的视频字幕的固定格式文本。" +
|
||||
|
|
@ -257,33 +262,39 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
TotalCaptionsDto captions, string sections)
|
||||
{
|
||||
var tryCount = 10;
|
||||
while (tryCount-->10)
|
||||
while (tryCount-->0)
|
||||
{
|
||||
try
|
||||
{
|
||||
var keyFrameArr = string.IsNullOrEmpty(taskInfo?.PPTVideoCode)
|
||||
? string.Empty
|
||||
: $"视频授课内容发生了变化的时间节点{taskInfo.PPTKeyFrame},授课阶段应当在附近时间发生变化。";
|
||||
: $"图像视频中授课内容PPT发生了变化的时间节点是{taskInfo.PPTKeyFrame},授课阶段结果可以参考这些时间节点。";
|
||||
var resFormat = """[{"StartTime":开始秒(number),"EndTime":结束秒(number),"Stage":阶段(string),"Theme":主题(string),"Content":内容总结(string)}]""";
|
||||
|
||||
var exerciseClass = taskInfo?.VideoType == AttachmentsInfoType.Review
|
||||
? $"本堂课是习题课,每个阶段因当是不同得例题讲解片段,并且所有的授课阶段都视为例题精讲。"
|
||||
: $"完整的课堂标准流程包含以下5个阶段:课程引入/新知讲解/例题精讲/课堂练习/知识总结。" +
|
||||
$"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" +
|
||||
$"通过生成的内容总结分析出对应的授课阶段主题。 ";
|
||||
? $"但是本堂课是习题课,所以每个阶段是不同的例题讲解内容。"
|
||||
: string.Empty;
|
||||
//$"请注意 本次分析的视频字幕只是其中一部分 不需要分析出所有类型的授课阶段。";
|
||||
var postMessages =
|
||||
$"请通过视频字幕内容分析出视频中{taskInfo.Subject}课堂的授课阶段。" +
|
||||
$"课堂内容与{sections}章节相关。" +
|
||||
$"请通过视频字幕内容分析出视频中课堂的授课阶段。" +
|
||||
$"课堂内容与{taskInfo.Subject}学科下的{sections}章节相关。" +
|
||||
$"{keyFrameArr}" +
|
||||
$"完整的课堂标准流程包含以下5个阶段:课程引入/新知讲解/例题精讲/课堂练习/知识总结。"+
|
||||
$"{exerciseClass}" +
|
||||
$"最后请检查每个授课阶段的时长,不允许出现超出800秒或者低于50秒的授课阶段。" +
|
||||
$"通过授课阶段的主要讲解内容分析出对应的授课阶段内容总结。" +
|
||||
$"通过生成的内容总结分析出对应的授课阶段主题。 "+
|
||||
$"最后请检查每个授课阶段的时长,不允许出现低于50秒的阶段。" +
|
||||
$"输出内容只返回json格式({resFormat})" +
|
||||
$"字幕格式(开始秒:内容|下一段字幕).以下是包含时间的视频字幕文本。" +
|
||||
$"字幕列表 {captions.Captions} 字幕结束!";
|
||||
|
||||
Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id.ToString()}.开始分析视频内容 {tryCount}");
|
||||
questionRes.AddRange(await ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕"));
|
||||
var resData = await ChatAsync<VideoKnowRes[]>(taskInfo.Id.ToString(), postMessages, "分析字幕");
|
||||
if (taskInfo?.VideoType == AttachmentsInfoType.Review)
|
||||
foreach (var item in resData)
|
||||
item.Stage = StageEnum.例题精讲.ToString();
|
||||
questionRes.AddRange(resData);
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
|
|
@ -305,6 +316,7 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
{
|
||||
taskId = task,
|
||||
model = model,
|
||||
max_tokens= model== "deepseek-reasoner"?16000:8000,
|
||||
stream = true,
|
||||
temperature = 0.2f,
|
||||
messages = messageArr
|
||||
|
|
@ -325,13 +337,13 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
redisCached[1] = new object[] { chatResp.Value.res, chatResp.Value.u, chatResp.Value.reasoning };
|
||||
RedisExpand.SetTaskGPTCached(task, time, redisCached);
|
||||
}
|
||||
chatResContent = chatResContent?.Replace("字幕内容", "课堂情况");
|
||||
chatResContent = chatResContent?.ExtractJsonStrings()?.FirstOrDefault();
|
||||
chatResContent = chatResContent?.Replace("\n", "");
|
||||
chatResContent = chatResContent?.Replace("```json", "");
|
||||
chatResContent = chatResContent?.Replace("```", "");
|
||||
chatResContent = chatResContent?.Replace("}{", "},{");
|
||||
chatResContent = chatResContent?.Replace("}|{", "},{");
|
||||
chatResContent = chatResContent?.Trim().ExtractJson().FirstOrDefault();
|
||||
chatResContent = chatResContent?.Trim();
|
||||
|
||||
var startsStr = typeof(T).IsArray ? "[" : "{";
|
||||
var endStr = typeof(T).IsArray ? "]" : "}";
|
||||
|
|
@ -393,17 +405,21 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
|
|||
.ProcessImageAsync(new SimpleTexOcrRequest(filePath));
|
||||
if (!sRes.Success)
|
||||
continue;
|
||||
var knowArr=string.Join(',', knowInfoArr.Select(s => s.KnowPointId + "|" + s.KnowPoint ));
|
||||
if(sRes.Result.res.value.Trim().Length<10)//总试题内容长度小于10 视为无效题目
|
||||
break;
|
||||
Console.WriteLine(DateTime.Now + $"=>{taskInfo.Id} 提取{knowInfoArr.First().StartTime}秒试题的试题内容");
|
||||
Console.WriteLine( sRes.Result.res.value);
|
||||
var knowArr=JsonSerializer.Serialize(knowInfoArr.Select(s => new { s.KnowPointId, s.KnowPoint }));
|
||||
var resFormat = """[{"TopicStem":string(试题题干),"Question:string(问题)","KnowPointId":(string)知识点ID}]""";
|
||||
var postMessages =
|
||||
$"提供一段内容是md格式的试题内容字符串。" +
|
||||
$"请提取出其中的试题内容。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" +
|
||||
$"知识点格式(知识点ID|知识点名称)范围[{knowArr}]。" +
|
||||
$"我将提供一段内容是md格式的试题内容字符串。" +
|
||||
$"请提取出其中的试题内容以及每个试题的问题。并且为每个试题关联上在我限定范围内的知识点(多个则用逗号分割)。" +
|
||||
$"知识点格式(json格式)范围[{knowArr}]。" +
|
||||
$"排除不是试题内容的文字,优化公式排版并且去除题号。" +
|
||||
$"如果存在多道大题,请帮忙拆分开!" +
|
||||
$"如果存在多道题(或者小问),则需要拆分成为多个试题对象!" +
|
||||
$"输出内容只返回json格式为({resFormat})" +
|
||||
$"以下是试题内容" +
|
||||
$"`{sRes.Result.res.info.markdown}`";
|
||||
$"`{sRes.Result.res.value}`";
|
||||
var resData = await ChatAsync<VideoQuestionOSSDto[]>(taskInfo.Id.ToString(), postMessages, "提取试题", "deepseek-chat");
|
||||
foreach (var q in resData)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -102,50 +102,6 @@ namespace VideoAnalysisCore.Common
|
|||
return Path.Combine(task.Id.ToString().LocalPath(), $"{FrameName}{(fTime / 5).ToString().PadLeft(3,'0')}.jpg");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 识别字符串中的json字符串
|
||||
/// </summary>
|
||||
/// <param name="input"></param>
|
||||
/// <returns></returns>
|
||||
public static List<string> ExtractJson(this string input)
|
||||
{
|
||||
List<string> jsonList = new List<string>();
|
||||
int index = 0;
|
||||
|
||||
while (index < input.Length)
|
||||
{
|
||||
if (input[index] == '{' || input[index] == '[')
|
||||
{
|
||||
int startIndex = index;
|
||||
int openCount = 1;
|
||||
index++;
|
||||
|
||||
while (index < input.Length && openCount > 0)
|
||||
{
|
||||
if (input[index] == '{' || input[index] == '[')
|
||||
{
|
||||
openCount++;
|
||||
}
|
||||
else if (input[index] == '}' || input[index] == ']')
|
||||
{
|
||||
openCount--;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
if (openCount == 0)
|
||||
{
|
||||
string json = input.Substring(startIndex, index - startIndex);
|
||||
jsonList.Add(json);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
index++;
|
||||
}
|
||||
}
|
||||
return jsonList;
|
||||
}
|
||||
/// <summary>
|
||||
/// 处理数学公式
|
||||
/// </summary>
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ using System.Collections.Generic;
|
|||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using VideoAnalysisCore.Common.Expand;
|
||||
|
||||
namespace VideoAnalysisCore.Common
|
||||
{
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ using AlibabaCloud.TeaUtil.Models;
|
|||
using Microsoft.Extensions.DependencyInjection;
|
||||
using VideoAnalysisCore.Job;
|
||||
|
||||
namespace VideoAnalysisCore.Common
|
||||
namespace VideoAnalysisCore.Common.Expand
|
||||
{
|
||||
public class AlibabaCloudVodConfig
|
||||
public class AlibabaCloudVodConfig
|
||||
{
|
||||
/// <summary>
|
||||
/// id
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ using Aliyun.Credentials.Models;
|
|||
using VideoAnalysisCore.Model.Dto;
|
||||
using System.IO;
|
||||
|
||||
namespace VideoAnalysisCore.Common
|
||||
namespace VideoAnalysisCore.Common.Expand
|
||||
{
|
||||
public class AliyunOSSConfig
|
||||
{
|
||||
|
|
@ -65,13 +65,13 @@ namespace VideoAnalysisCore.Common
|
|||
// 必填,请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
|
||||
AccessKeySecret = AppCommon.Config.AliyunOSS.AccessKeySecret,
|
||||
Endpoint = AppCommon.Config.AliyunOSS.Endpoint,
|
||||
Region= AppCommon.Config.AliyunOSS.Region,
|
||||
Region = AppCommon.Config.AliyunOSS.Region,
|
||||
};// 创建ClientConfiguration实例,按照您的需要修改默认参数。
|
||||
var conf = new ClientConfiguration();
|
||||
// 设置v4签名。
|
||||
conf.SignatureVersion = SignatureVersion.V4;
|
||||
// 创建OssClient实例。
|
||||
var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf);
|
||||
var oss = new OssClient(config.Endpoint, config.AccessKeyId, config.AccessKeySecret, conf);
|
||||
oss.SetRegion(config.Region);
|
||||
|
||||
service.AddSingleton(oss);
|
||||
|
|
@ -81,7 +81,7 @@ namespace VideoAnalysisCore.Common
|
|||
/// </summary>
|
||||
/// <param name="oss"></param>
|
||||
/// <param name="fileArr">视频实体片段</param>
|
||||
public static void AddVideoQuestionUrl(this OssClient oss, List<VideoQuestionOSSDto> fileArr )
|
||||
public static void AddVideoQuestionUrl(this OssClient oss, List<VideoQuestionOSSDto> fileArr)
|
||||
{
|
||||
var cached = new HashSet<string>();
|
||||
foreach (var item in fileArr)
|
||||
|
|
@ -89,7 +89,7 @@ namespace VideoAnalysisCore.Common
|
|||
try
|
||||
{
|
||||
var path = item.VideoTaskId.ToString() + "/" + Path.GetFileName(item.FilePath);
|
||||
if (cached.Contains(item.FilePath))
|
||||
if (cached.Contains(item.FilePath))
|
||||
{
|
||||
item.ImageUrl = AppCommon.Config.AliyunOSS.BucketDomain + "/" + path;
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ using System.Net.Http.Json;
|
|||
using System.Net.Sockets;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
using AlibabaCloud.OpenApiClient.Models;
|
||||
using AlibabaCloud.SDK.Vod20170321;
|
||||
|
|
@ -15,10 +17,9 @@ using AlibabaCloud.TeaUtil.Models;
|
|||
using Azure;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Newtonsoft.Json;
|
||||
using VideoAnalysisCore.Job;
|
||||
|
||||
namespace VideoAnalysisCore.Common
|
||||
namespace VideoAnalysisCore.Common.Expand
|
||||
{
|
||||
|
||||
|
||||
|
|
@ -32,7 +33,10 @@ namespace VideoAnalysisCore.Common
|
|||
public class SimpleTexOcrResponseDataRes
|
||||
{
|
||||
public string type { get; set; }
|
||||
public SimpleTexOcrResponseDataInfo info { get; set; }
|
||||
|
||||
[JsonPropertyName("info")] // 替换为实际字段名
|
||||
public JsonElement DataInfo { get; set; } // 使用JsonElement接收未知类型
|
||||
public string value { get; set; }
|
||||
}
|
||||
|
||||
public class SimpleTexOcrResponseDataInfo
|
||||
|
|
@ -81,6 +85,7 @@ namespace VideoAnalysisCore.Common
|
|||
{
|
||||
public bool Success { get; set; }
|
||||
public SimpleTexOcrResponseData Result { get; set; }
|
||||
public string ResultStr { get; set; }
|
||||
public string Error { get; set; }
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +128,7 @@ namespace VideoAnalysisCore.Common
|
|||
|
||||
if (request.isolated_formula_wrapper != null)
|
||||
{
|
||||
var isolatedWrapper = JsonConvert.SerializeObject(request.isolated_formula_wrapper);
|
||||
var isolatedWrapper = JsonSerializer.Serialize(request.isolated_formula_wrapper);
|
||||
content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper));
|
||||
parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper;
|
||||
}
|
||||
|
|
@ -161,11 +166,25 @@ namespace VideoAnalysisCore.Common
|
|||
var response = await client.SendAsync(requestMessage);
|
||||
var resStr = await response.Content.ReadAsStringAsync();
|
||||
var responseContent = await response.Content.ReadFromJsonAsync<SimpleTexOcrResponseData>();
|
||||
if (responseContent.res.DataInfo.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
responseContent.res.value = JsonSerializer.Deserialize<SimpleTexOcrResponseDataInfo>(
|
||||
responseContent.res.DataInfo.GetRawText(),
|
||||
new JsonSerializerOptions { PropertyNameCaseInsensitive = true }
|
||||
)?.markdown??string.Empty;
|
||||
// 处理字符串
|
||||
}
|
||||
else if (responseContent.res.DataInfo.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
responseContent.res.value = responseContent.res.DataInfo.GetString();
|
||||
}
|
||||
|
||||
request.file.Dispose();
|
||||
return new SimpleTexOcrResponse
|
||||
{
|
||||
Success = response.IsSuccessStatusCode,
|
||||
Result = responseContent,
|
||||
ResultStr= resStr,
|
||||
Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}"
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,105 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace VideoAnalysisCore.Common
|
||||
{
|
||||
public static class JsonExtractor
|
||||
{
|
||||
/// <summary>
|
||||
/// 提取json字符串
|
||||
/// </summary>
|
||||
/// <param name="input"></param>
|
||||
/// <returns></returns>
|
||||
public static List<string> ExtractJsonStrings(this string input)
|
||||
{
|
||||
List<string> jsonList = new List<string>();
|
||||
int index = 0;
|
||||
while (index < input.Length)
|
||||
{
|
||||
if (input[index] == '{' || input[index] == '[')
|
||||
{
|
||||
int? endIndex = FindMatchingBracket(input, index);
|
||||
if (endIndex.HasValue)
|
||||
{
|
||||
string candidate = input.Substring(index, endIndex.Value - index + 1);
|
||||
if (IsValidJson(candidate))
|
||||
{
|
||||
jsonList.Add(candidate);
|
||||
index = endIndex.Value + 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
index++;
|
||||
}
|
||||
return jsonList;
|
||||
}
|
||||
|
||||
private static int? FindMatchingBracket(string str, int start)
|
||||
{
|
||||
Stack<char> stack = new Stack<char>();
|
||||
bool inString = false;
|
||||
bool inEscape = false;
|
||||
|
||||
for (int i = start; i < str.Length; i++)
|
||||
{
|
||||
char c = str[i];
|
||||
|
||||
if (inEscape)
|
||||
{
|
||||
inEscape = false;
|
||||
}
|
||||
else if (inString)
|
||||
{
|
||||
if (c == '\\')
|
||||
inEscape = true;
|
||||
else if (c == '"')
|
||||
inString = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '{':
|
||||
case '[':
|
||||
stack.Push(c);
|
||||
break;
|
||||
case '}':
|
||||
if (stack.Count == 0 || stack.Peek() != '{')
|
||||
return null;
|
||||
stack.Pop();
|
||||
break;
|
||||
case ']':
|
||||
if (stack.Count == 0 || stack.Peek() != '[')
|
||||
return null;
|
||||
stack.Pop();
|
||||
break;
|
||||
case '"':
|
||||
inString = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (stack.Count == 0)
|
||||
return i;
|
||||
}
|
||||
return null; // 括号未完全匹配
|
||||
}
|
||||
|
||||
public static bool IsValidJson(string candidate)
|
||||
{
|
||||
if (string.IsNullOrEmpty(candidate))
|
||||
return false;
|
||||
try
|
||||
{
|
||||
JsonDocument.Parse(candidate);
|
||||
return true;
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue