Learn.VideoAnalysis/VideoAnalysisCore/Common/Expand/SimpLetexExpand.cs

211 lines
7.6 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Net.Http.Json;
using System.Net.Sockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
using AlibabaCloud.OpenApiClient.Models;
using AlibabaCloud.SDK.Vod20170321;
using AlibabaCloud.SDK.Vod20170321.Models;
using AlibabaCloud.TeaUtil.Models;
using Azure;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using Newtonsoft.Json;
using VideoAnalysisCore.Job;
namespace VideoAnalysisCore.Common
{
public class SimpleTexOcrResponseData
{
public bool status { get; set; }
public SimpleTexOcrResponseDataRes res { get; set; }
public string request_id { get; set; }
}
public class SimpleTexOcrResponseDataRes
{
public string type { get; set; }
public SimpleTexOcrResponseDataInfo info { get; set; }
}
public class SimpleTexOcrResponseDataInfo
{
public string markdown { get; set; }
}
/// <summary>
/// 请求参数
/// </summary>
public class SimpleTexOcrRequest
{
public SimpleTexOcrRequest(string filePath)
{
file = File.OpenRead(filePath);
}
/// <summary>
/// 合法的图片二进制文件信息包括png/jpg等格式无法开启批量调用仅支持一次上传一张图片
/// </summary>
public FileStream file { get; set; }
/// <summary>
/// 用以指定识别图片的类型如果使用auto则会自动检测使用document会返回markdown文档结果使用formula会返回LaTeX结果
/// <para>"auto", "document", "formula"</para>
/// </summary>
public string rec_mode { get; set; } = "auto";
/// <summary>
/// 开启后模型将基于0°90°, 180°, 270°自动矫正上传图片的方向默认不开启
/// </summary>
public bool enable_img_rot { get; set; } = false;
/// <summary>
/// 用于修改行内公式在markdown中的包裹符号。以Json形式填入如果格式错误将使用默认的包裹符号
/// <para>示例:["$","$"]</para>
/// </summary>
public string inline_formula_wrapper { get; set; }
/// <summary>
/// 用于修改独立行公式在markdown中的包裹符号。以Json形式填入如果格式错误将使用默认的包裹符号
/// <para>示例:["$$","$$"]</para>
/// </summary>
public string isolated_formula_wrapper { get; set; }
}
/// <summary>
/// ocr响应
/// </summary>
public class SimpleTexOcrResponse
{
public bool Success { get; set; }
public SimpleTexOcrResponseData Result { get; set; }
public string Error { get; set; }
}
public class SimpLetexClient
{
private readonly IHttpClientFactory _httpClientFactory;
public SimpLetexClient(
IHttpClientFactory httpClientFactory)
{
_httpClientFactory = httpClientFactory;
}
public async Task<SimpleTexOcrResponse> ProcessImageAsync(SimpleTexOcrRequest request)
{
var client = _httpClientFactory.CreateClient();
using var content = new MultipartFormDataContent();
var parameters = new Dictionary<string, string>();
// 添加文件内容
var fileContent = new StreamContent(request.file);
content.Add(fileContent, nameof(request.file), Path.GetFileName(request.file.Name));
// 添加并收集其他参数
if (request.rec_mode != "auto")
{
content.Add(new StringContent(request.rec_mode), nameof(request.rec_mode));
parameters[nameof(request.rec_mode)] = request.rec_mode;
}
var enableImgRotStr = request.enable_img_rot.ToString().ToLower();
content.Add(new StringContent(enableImgRotStr), nameof(request.enable_img_rot));
parameters[nameof(request.enable_img_rot)] = enableImgRotStr;
if (request.inline_formula_wrapper != null)
{
content.Add(new StringContent(request.inline_formula_wrapper), nameof(request.inline_formula_wrapper));
parameters[nameof(request.inline_formula_wrapper)] = request.inline_formula_wrapper;
}
if (request.isolated_formula_wrapper != null)
{
var isolatedWrapper = JsonConvert.SerializeObject(request.isolated_formula_wrapper);
content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper));
parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper;
}
// 生成鉴权参数
var randomStr = Guid.NewGuid().ToString("N").Substring(16);
var timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString();
var appId = AppCommon.Config.SimpLetex.AppId;
parameters["timestamp"] = timestamp;
parameters["random-str"] = randomStr;
parameters["app-id"] = appId;
// 生成签名
var signStr = string.Join("&", parameters
.OrderBy(p => p.Key)
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}"))
+ $"&secret={AppCommon.Config.SimpLetex.AppSecret}";
var sign = ComputeMD5(signStr);
// 创建请求并添加Header
var requestMessage = new HttpRequestMessage(HttpMethod.Post,
AppCommon.Config.SimpLetex.Host + "simpletex_ocr")
{
Content = content
};
requestMessage.Headers.Add("random-str", randomStr);
requestMessage.Headers.Add("timestamp", timestamp);
requestMessage.Headers.Add("app-id", appId);
requestMessage.Headers.Add("sign", sign);
try
{
var response = await client.SendAsync(requestMessage);
var responseContent = await response.Content.ReadFromJsonAsync<SimpleTexOcrResponseData>();
return new SimpleTexOcrResponse
{
Success = response.IsSuccessStatusCode,
Result = responseContent,
Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}"
};
}
catch (Exception ex)
{
return new SimpleTexOcrResponse
{
Success = false,
Error = $"Request Failed: {ex.Message}"
};
}
}
private string GenerateSignatureString(IDictionary<string, string> parameters, string secret)
{
var sortedParams = parameters
.OrderBy(p => p.Key)
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}");
return string.Join("&", sortedParams) + $"&secret={secret}";
}
private string ComputeMD5(string input)
{
using var md5 = MD5.Create();
var inputBytes = Encoding.UTF8.GetBytes(input);
var hashBytes = md5.ComputeHash(inputBytes);
return BitConverter.ToString(hashBytes).Replace("-", "").ToLower();
}
}
/// <summary>
/// 服务注册扩展
/// </summary>
public static class SSimpLetexExtensions
{
public static IServiceCollection AddSimpleTexOcrClient(this IServiceCollection services)
{
services.AddSingleton<SimpLetexClient>();
return services;
}
}
}