新增 simpletex公式分析 API

This commit is contained in:
小肥羊 2025-05-20 14:52:01 +08:00
parent 7ca52f3c97
commit 0f0a24aeb3
7 changed files with 219 additions and 8 deletions

View File

@ -63,6 +63,7 @@ namespace Learn.VideoAnalysis
builder.Configuration.AddAppConfig(args);
builder.Services.AddSqlSugarExpand();
builder.Services.AddSimpleTexOcrClient();
builder.Services.AddDownloadFileExpand();
builder.Services.AddAlibabaCloudVod();
builder.Services.AddRedisExpand();

View File

@ -48,6 +48,11 @@
"aliyun": {
"Host": "https://dashscope.aliyuncs.com/compatible-mode/",
"ApiKey": "sk-1742c2bf7b9846ae835de598dc6c427b"
},
"SimpLetex": {
"Host": "https://api.deepseek.com/chat/completions",
"AppSecret": "05ZbPfCFZgTmfd4uIqHHc9pHgYR2V8bk",
"AppId": "GH2OXwuxSZEH5W28H61bdSzD"
}
},
"DB": {

View File

@ -76,10 +76,6 @@ namespace VideoAnalysisCore.AICore.GPT.DeepSeek
var captionsArr = JsonSerializer.Deserialize<SenseVoiceRes[]>(taskInfo.Captions);
var fileNameResFormat = "{授课章节: string|null}";
//var fileNamePostMessages = title +
// " 这是一堂课的标题,请你基于标题帮我分析出这堂课所讲授的内容与最恰当的授课章节(关联最贴切的章节,保留一个章节!)." +
// $"章节范围限定在[{string.Join(',', xkwKnows)}]范围内." +
// $"输出格式 json字符串 对象格式{fileNameResFormat}";
var rCaptionArr = string.Join(',', captionsArr
.Where((s, i) => i % 3 == 0)
.Take((int)(captionsArr?.Length ?? 0 / 2.2))

View File

@ -59,8 +59,29 @@ namespace VideoAnalysisCore.Common
/// </summary>
public TaskSettingConfig TaskSetting { get; set; } = new TaskSettingConfig();
/// <summary>
/// SimpLetex配置
/// </summary>
public SimpLetexConfig SimpLetex { get; set; } = new SimpLetexConfig();
}
public class SimpLetexConfig
{
/// <summary>
/// 请求 公开的服务地址
/// </summary>
public string Host { get; set; } = string.Empty;
/// <summary>
/// api的密钥
/// </summary>
public string AppSecret { get; set; } = string.Empty;
/// <summary>
/// 应用ID
/// </summary>
public string AppId { get; set; } = string.Empty;
}
public class TaskSettingConfig
{
/// <summary>

View File

@ -0,0 +1,187 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Net.Sockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
using AlibabaCloud.OpenApiClient.Models;
using AlibabaCloud.SDK.Vod20170321;
using AlibabaCloud.SDK.Vod20170321.Models;
using AlibabaCloud.TeaUtil.Models;
using Azure;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using Newtonsoft.Json;
using VideoAnalysisCore.Job;
namespace VideoAnalysisCore.Common
{
/// <summary>
/// 请求参数
/// </summary>
public class SimpleTexOcrRequest
{
/// <summary>
/// 合法的图片二进制文件信息包括png/jpg等格式无法开启批量调用仅支持一次上传一张图片
/// </summary>
public FileStream file { get; set; }
/// <summary>
/// 用以指定识别图片的类型如果使用auto则会自动检测使用document会返回markdown文档结果使用formula会返回LaTeX结果
/// <para>"auto", "document", "formula"</para>
/// </summary>
public string rec_mode { get; set; } = "auto";
/// <summary>
/// 开启后模型将基于0°90°, 180°, 270°自动矫正上传图片的方向默认不开启
/// </summary>
public bool enable_img_rot { get; set; }=false;
/// <summary>
/// 用于修改行内公式在markdown中的包裹符号。以Json形式填入如果格式错误将使用默认的包裹符号
/// <para>示例:["$","$"]</para>
/// </summary>
public string inline_formula_wrapper { get; set; }
/// <summary>
/// 用于修改独立行公式在markdown中的包裹符号。以Json形式填入如果格式错误将使用默认的包裹符号
/// <para>示例:["$$","$$"]</para>
/// </summary>
public string isolated_formula_wrapper { get; set; }
}
/// <summary>
/// ocr响应
/// </summary>
public class SimpleTexOcrResponse
{
public bool Success { get; set; }
public string Result { get; set; }
public string Error { get; set; }
}
public class SimpLetexClient
{
private readonly IHttpClientFactory _httpClientFactory;
private const string ApiUrl = "https://server.simpletex.cn/api/simpletex_ocr";
public SimpLetexClient(
IHttpClientFactory httpClientFactory)
{
_httpClientFactory = httpClientFactory;
}
public async Task<SimpleTexOcrResponse> ProcessImageAsync(SimpleTexOcrRequest request)
{
var client = _httpClientFactory.CreateClient();
using var content = new MultipartFormDataContent();
var parameters = new Dictionary<string, string>();
// 添加文件内容
var fileContent = new StreamContent(request.file);
fileContent.Headers.ContentType = new MediaTypeHeaderValue("image/jpeg");
content.Add(fileContent, nameof(request.file));
// 添加并收集其他参数
if (request.rec_mode != "auto")
{
content.Add(new StringContent(request.rec_mode), nameof(request.rec_mode));
parameters[nameof(request.rec_mode)] = request.rec_mode;
}
var enableImgRotStr = request.enable_img_rot.ToString().ToLower();
content.Add(new StringContent(enableImgRotStr), nameof(request.enable_img_rot));
parameters[nameof(request.enable_img_rot)] = enableImgRotStr;
if (request.inline_formula_wrapper != null)
{
content.Add(new StringContent(request.inline_formula_wrapper), nameof(request.inline_formula_wrapper));
parameters[nameof(request.inline_formula_wrapper)] = request.inline_formula_wrapper;
}
if (request.isolated_formula_wrapper != null)
{
var isolatedWrapper = JsonConvert.SerializeObject(request.isolated_formula_wrapper);
content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper));
parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper;
}
// 生成鉴权参数
var randomStr = Guid.NewGuid().ToString().Take(16).ToString();
var timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString();
var appId = AppCommon.Config.SimpLetex.AppId;
// 添加鉴权参数到签名集合
parameters["random-str"] = randomStr;
parameters["timestamp"] = timestamp;
parameters["app-id"] = appId;
// 生成签名
var signStr = string.Join("&", parameters
.OrderBy(p => p.Key)
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}"))
+ $"&secret={AppCommon.Config.SimpLetex.AppSecret}";
var sign = ComputeMD5(signStr);
// 创建请求并添加Header
var requestMessage = new HttpRequestMessage(HttpMethod.Post, "api/simpletex_ocr")
{
Content = content
};
requestMessage.Headers.Add("random-str", randomStr);
requestMessage.Headers.Add("timestamp", timestamp);
requestMessage.Headers.Add("app-id", appId);
requestMessage.Headers.Add("sign", sign);
try
{
var response = await client.SendAsync(requestMessage);
var responseContent = await response.Content.ReadAsStringAsync();
return new SimpleTexOcrResponse
{
Success = response.IsSuccessStatusCode,
Result = responseContent,
Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}"
};
}
catch (Exception ex)
{
return new SimpleTexOcrResponse
{
Success = false,
Error = $"Request Failed: {ex.Message}"
};
}
}
private string GenerateSignatureString(IDictionary<string, string> parameters, string secret)
{
var sortedParams = parameters
.OrderBy(p => p.Key)
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}");
return string.Join("&", sortedParams) + $"&secret={secret}";
}
private string ComputeMD5(string input)
{
using var md5 = MD5.Create();
var inputBytes = Encoding.UTF8.GetBytes(input);
var hashBytes = md5.ComputeHash(inputBytes);
return BitConverter.ToString(hashBytes).Replace("-", "").ToLower();
}
}
/// <summary>
/// 服务注册扩展
/// </summary>
public static class SSimpLetexExtensions
{
public static IServiceCollection AddSimpleTexOcrClient( this IServiceCollection services)
{
services.AddSingleton<SimpLetexClient>();
return services;
}
}
}

View File

@ -157,13 +157,14 @@ namespace VideoAnalysisCore.Controllers
/// <param name="tagId">×Ô¶¨Òåid</param>
/// <returns></returns>
[HttpGet(Name = "TaskKnowInfo")]
public async Task<IActionResult> TaskKnowInfo(long taskId, string? tagId)
public async Task<IActionResult> TaskKnowInfo(string? tagId)
{
if (taskId == 0 && string.IsNullOrEmpty(tagId))
if ( string.IsNullOrEmpty(tagId))
return BadRequest();
var taskId = int.Parse(tagId);
var task = await videoTaskDB.AsQueryable()
.WhereIF(taskId != 0, s => s.Id == taskId)
.WhereIF(!string.IsNullOrEmpty(tagId), s => s.TagId == tagId)
.WhereIF(taskId != 0, s => s.Id == taskId || s.TagId == tagId || s.PPTVideoCode== tagId)
.FirstAsync();
if (task is null)
return BadRequest("ÎÞЧÈÎÎñ");