using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Net.Http.Json;
using System.Net.Sockets;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
using AlibabaCloud.OpenApiClient.Models;
using AlibabaCloud.SDK.Vod20170321;
using AlibabaCloud.SDK.Vod20170321.Models;
using AlibabaCloud.TeaUtil.Models;
using Azure;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Options;
using Newtonsoft.Json;
using VideoAnalysisCore.Job;
namespace VideoAnalysisCore.Common
{
public class SimpleTexOcrResponseData
{
public bool status { get; set; }
public SimpleTexOcrResponseDataRes res { get; set; }
public string request_id { get; set; }
}
public class SimpleTexOcrResponseDataRes
{
public string type { get; set; }
public SimpleTexOcrResponseDataInfo info { get; set; }
}
public class SimpleTexOcrResponseDataInfo
{
public string markdown { get; set; }
}
///
/// 请求参数
///
public class SimpleTexOcrRequest
{
public SimpleTexOcrRequest(string filePath)
{
file = File.OpenRead(filePath);
}
///
/// 合法的图片二进制文件信息,包括png/jpg等格式,无法开启批量调用,仅支持一次上传一张图片
///
public FileStream file { get; set; }
///
/// 用以指定识别图片的类型,如果使用auto则会自动检测,使用document会返回markdown文档结果,使用formula会返回LaTeX结果
/// "auto", "document", "formula"
///
public string rec_mode { get; set; } = "document";
///
/// 开启后,模型将基于0°,90°, 180°, 270°自动矫正上传图片的方向,默认不开启
///
public bool enable_img_rot { get; set; } = false;
///
/// 用于修改行内公式在markdown中的包裹符号。以Json形式填入,如果格式错误将使用默认的包裹符号
/// 示例:["$","$"]
///
public string inline_formula_wrapper { get; set; }
///
/// 用于修改独立行公式在markdown中的包裹符号。以Json形式填入,如果格式错误将使用默认的包裹符号
/// 示例:["$$","$$"]
///
public string isolated_formula_wrapper { get; set; }
}
///
/// ocr响应
///
public class SimpleTexOcrResponse
{
public bool Success { get; set; }
public SimpleTexOcrResponseData Result { get; set; }
public string Error { get; set; }
}
public class SimpLetexClient
{
private readonly IHttpClientFactory _httpClientFactory;
public SimpLetexClient(
IHttpClientFactory httpClientFactory)
{
_httpClientFactory = httpClientFactory;
}
public async Task ProcessImageAsync(SimpleTexOcrRequest request)
{
var client = _httpClientFactory.CreateClient();
using var content = new MultipartFormDataContent();
var parameters = new Dictionary();
// 添加文件内容
var fileContent = new StreamContent(request.file);
content.Add(fileContent, nameof(request.file), Path.GetFileName(request.file.Name));
// 添加并收集其他参数
if (request.rec_mode != "auto")
{
content.Add(new StringContent(request.rec_mode), nameof(request.rec_mode));
parameters[nameof(request.rec_mode)] = request.rec_mode;
}
var enableImgRotStr = request.enable_img_rot.ToString().ToLower();
content.Add(new StringContent(enableImgRotStr), nameof(request.enable_img_rot));
parameters[nameof(request.enable_img_rot)] = enableImgRotStr;
if (request.inline_formula_wrapper != null)
{
content.Add(new StringContent(request.inline_formula_wrapper), nameof(request.inline_formula_wrapper));
parameters[nameof(request.inline_formula_wrapper)] = request.inline_formula_wrapper;
}
if (request.isolated_formula_wrapper != null)
{
var isolatedWrapper = JsonConvert.SerializeObject(request.isolated_formula_wrapper);
content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper));
parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper;
}
// 生成鉴权参数
var randomStr = Guid.NewGuid().ToString("N").Substring(16);
var timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString();
var appId = AppCommon.Config.SimpLetex.AppId;
parameters["timestamp"] = timestamp;
parameters["random-str"] = randomStr;
parameters["app-id"] = appId;
// 生成签名
var signStr = string.Join("&", parameters
.OrderBy(p => p.Key)
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}"))
+ $"&secret={AppCommon.Config.SimpLetex.AppSecret}";
var sign = ComputeMD5(signStr);
// 创建请求并添加Header
var requestMessage = new HttpRequestMessage(HttpMethod.Post,
AppCommon.Config.SimpLetex.Host + "simpletex_ocr")
{
Content = content
};
requestMessage.Headers.Add("random-str", randomStr);
requestMessage.Headers.Add("timestamp", timestamp);
requestMessage.Headers.Add("app-id", appId);
requestMessage.Headers.Add("sign", sign);
try
{
var response = await client.SendAsync(requestMessage);
var resStr = await response.Content.ReadAsStringAsync();
var responseContent = await response.Content.ReadFromJsonAsync();
request.file.Dispose();
return new SimpleTexOcrResponse
{
Success = response.IsSuccessStatusCode,
Result = responseContent,
Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}"
};
}
catch (Exception ex)
{
return new SimpleTexOcrResponse
{
Success = false,
Error = $"Request Failed: {ex.Message}"
};
}
}
private string GenerateSignatureString(IDictionary parameters, string secret)
{
var sortedParams = parameters
.OrderBy(p => p.Key)
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}");
return string.Join("&", sortedParams) + $"&secret={secret}";
}
private string ComputeMD5(string input)
{
using var md5 = MD5.Create();
var inputBytes = Encoding.UTF8.GetBytes(input);
var hashBytes = md5.ComputeHash(inputBytes);
return BitConverter.ToString(hashBytes).Replace("-", "").ToLower();
}
}
///
/// 服务注册扩展
///
public static class SSimpLetexExtensions
{
public static IServiceCollection AddSimpleTexOcrClient(this IServiceCollection services)
{
services.AddSingleton();
return services;
}
}
}