using System; using System.Collections.Generic; using System.Linq; using System.Net.Http; using System.Net.Http.Headers; using System.Net.Http.Json; using System.Net.Sockets; using System.Security.Cryptography; using System.Text; using System.Text.Json.Serialization; using System.Text.Json; using System.Threading.Tasks; using AlibabaCloud.OpenApiClient.Models; using AlibabaCloud.SDK.Vod20170321; using AlibabaCloud.SDK.Vod20170321.Models; using AlibabaCloud.TeaUtil.Models; using Azure; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Options; using VideoAnalysisCore.Job; namespace VideoAnalysisCore.Common.Expand { public class SimpleTexOcrResponseData { public bool status { get; set; } public SimpleTexOcrResponseDataRes res { get; set; } public string request_id { get; set; } } public class SimpleTexOcrResponseDataRes { public string type { get; set; } [JsonPropertyName("info")] // 替换为实际字段名 public JsonElement DataInfo { get; set; } // 使用JsonElement接收未知类型 public string value { get; set; } } public class SimpleTexOcrResponseDataInfo { public string markdown { get; set; } } /// /// 请求参数 /// public class SimpleTexOcrRequest { public SimpleTexOcrRequest(string filePath) { file = File.OpenRead(filePath); } /// /// 合法的图片二进制文件信息,包括png/jpg等格式,无法开启批量调用,仅支持一次上传一张图片 /// public FileStream file { get; set; } /// /// 用以指定识别图片的类型,如果使用auto则会自动检测,使用document会返回markdown文档结果,使用formula会返回LaTeX结果 /// "auto", "document", "formula" /// public string rec_mode { get; set; } = "document"; /// /// 开启后,模型将基于0°,90°, 180°, 270°自动矫正上传图片的方向,默认不开启 /// public bool enable_img_rot { get; set; } = false; /// /// 用于修改行内公式在markdown中的包裹符号。以Json形式填入,如果格式错误将使用默认的包裹符号 /// 示例:["$","$"] /// public string inline_formula_wrapper { get; set; } /// /// 用于修改独立行公式在markdown中的包裹符号。以Json形式填入,如果格式错误将使用默认的包裹符号 /// 示例:["$$","$$"] /// public string isolated_formula_wrapper { get; set; } } /// /// ocr响应 /// public class SimpleTexOcrResponse { public bool Success { get; set; } public SimpleTexOcrResponseData Result { get; set; } public string ResultStr { get; set; } public string Error { get; set; } } public class SimpLetexClient { private readonly IHttpClientFactory _httpClientFactory; public SimpLetexClient( IHttpClientFactory httpClientFactory) { _httpClientFactory = httpClientFactory; } public async Task ProcessImageAsync(SimpleTexOcrRequest request) { var client = _httpClientFactory.CreateClient(); using var content = new MultipartFormDataContent(); var parameters = new Dictionary(); // 添加文件内容 var fileContent = new StreamContent(request.file); content.Add(fileContent, nameof(request.file), Path.GetFileName(request.file.Name)); // 添加并收集其他参数 if (request.rec_mode != "auto") { content.Add(new StringContent(request.rec_mode), nameof(request.rec_mode)); parameters[nameof(request.rec_mode)] = request.rec_mode; } var enableImgRotStr = request.enable_img_rot.ToString().ToLower(); content.Add(new StringContent(enableImgRotStr), nameof(request.enable_img_rot)); parameters[nameof(request.enable_img_rot)] = enableImgRotStr; if (request.inline_formula_wrapper != null) { content.Add(new StringContent(request.inline_formula_wrapper), nameof(request.inline_formula_wrapper)); parameters[nameof(request.inline_formula_wrapper)] = request.inline_formula_wrapper; } if (request.isolated_formula_wrapper != null) { var isolatedWrapper = JsonSerializer.Serialize(request.isolated_formula_wrapper); content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper)); parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper; } // 生成鉴权参数 var randomStr = Guid.NewGuid().ToString("N").Substring(16); var timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString(); var appId = AppCommon.Config.SimpLetex.AppId; parameters["timestamp"] = timestamp; parameters["random-str"] = randomStr; parameters["app-id"] = appId; // 生成签名 var signStr = string.Join("&", parameters .OrderBy(p => p.Key) .Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}")) + $"&secret={AppCommon.Config.SimpLetex.AppSecret}"; var sign = ComputeMD5(signStr); // 创建请求并添加Header var requestMessage = new HttpRequestMessage(HttpMethod.Post, AppCommon.Config.SimpLetex.Host + "simpletex_ocr") { Content = content }; requestMessage.Headers.Add("random-str", randomStr); requestMessage.Headers.Add("timestamp", timestamp); requestMessage.Headers.Add("app-id", appId); requestMessage.Headers.Add("sign", sign); try { var response = await client.SendAsync(requestMessage); var resStr = await response.Content.ReadAsStringAsync(); var responseContent = await response.Content.ReadFromJsonAsync(); if (responseContent.res.DataInfo.ValueKind == JsonValueKind.Object) { responseContent.res.value = JsonSerializer.Deserialize( responseContent.res.DataInfo.GetRawText(), new JsonSerializerOptions { PropertyNameCaseInsensitive = true } )?.markdown??string.Empty; // 处理字符串 } else if (responseContent.res.DataInfo.ValueKind == JsonValueKind.String) { responseContent.res.value = responseContent.res.DataInfo.GetString(); } request.file.Dispose(); return new SimpleTexOcrResponse { Success = response.IsSuccessStatusCode, Result = responseContent, ResultStr= resStr, Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}" }; } catch (Exception ex) { return new SimpleTexOcrResponse { Success = false, Error = $"Request Failed: {ex.Message}" }; } } private string GenerateSignatureString(IDictionary parameters, string secret) { var sortedParams = parameters .OrderBy(p => p.Key) .Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}"); return string.Join("&", sortedParams) + $"&secret={secret}"; } private string ComputeMD5(string input) { using var md5 = MD5.Create(); var inputBytes = Encoding.UTF8.GetBytes(input); var hashBytes = md5.ComputeHash(inputBytes); return BitConverter.ToString(hashBytes).Replace("-", "").ToLower(); } } /// /// 服务注册扩展 /// public static class SSimpLetexExtensions { public static IServiceCollection AddSimpleTexOcrClient(this IServiceCollection services) { services.AddSingleton(); return services; } } }