231 lines
8.6 KiB
C#
231 lines
8.6 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.Linq;
|
||
using System.Net.Http;
|
||
using System.Net.Http.Headers;
|
||
using System.Net.Http.Json;
|
||
using System.Net.Sockets;
|
||
using System.Security.Cryptography;
|
||
using System.Text;
|
||
using System.Text.Json.Serialization;
|
||
using System.Text.Json;
|
||
using System.Threading.Tasks;
|
||
using AlibabaCloud.OpenApiClient.Models;
|
||
using AlibabaCloud.SDK.Vod20170321;
|
||
using AlibabaCloud.SDK.Vod20170321.Models;
|
||
using AlibabaCloud.TeaUtil.Models;
|
||
using Azure;
|
||
using Microsoft.Extensions.DependencyInjection;
|
||
using Microsoft.Extensions.Options;
|
||
using VideoAnalysisCore.Job;
|
||
|
||
namespace VideoAnalysisCore.Common.Expand
|
||
{
|
||
|
||
|
||
public class SimpleTexOcrResponseData
|
||
{
|
||
public bool status { get; set; }
|
||
public SimpleTexOcrResponseDataRes res { get; set; }
|
||
public string request_id { get; set; }
|
||
}
|
||
|
||
public class SimpleTexOcrResponseDataRes
|
||
{
|
||
public string type { get; set; }
|
||
|
||
[JsonPropertyName("info")] // 替换为实际字段名
|
||
public JsonElement DataInfo { get; set; } // 使用JsonElement接收未知类型
|
||
public string value { get; set; }
|
||
}
|
||
|
||
public class SimpleTexOcrResponseDataInfo
|
||
{
|
||
public string markdown { get; set; }
|
||
}
|
||
|
||
|
||
/// <summary>
|
||
/// 请求参数
|
||
/// </summary>
|
||
public class SimpleTexOcrRequest
|
||
{
|
||
public SimpleTexOcrRequest(string filePath)
|
||
{
|
||
file = File.OpenRead(filePath);
|
||
}
|
||
/// <summary>
|
||
/// 合法的图片二进制文件信息,包括png/jpg等格式,无法开启批量调用,仅支持一次上传一张图片
|
||
/// </summary>
|
||
public FileStream file { get; set; }
|
||
/// <summary>
|
||
/// 用以指定识别图片的类型,如果使用auto则会自动检测,使用document会返回markdown文档结果,使用formula会返回LaTeX结果
|
||
/// <para>"auto", "document", "formula"</para>
|
||
/// </summary>
|
||
public string rec_mode { get; set; } = "document";
|
||
/// <summary>
|
||
/// 开启后,模型将基于0°,90°, 180°, 270°自动矫正上传图片的方向,默认不开启
|
||
/// </summary>
|
||
public bool enable_img_rot { get; set; } = false;
|
||
/// <summary>
|
||
/// 用于修改行内公式在markdown中的包裹符号。以Json形式填入,如果格式错误将使用默认的包裹符号
|
||
/// <para>示例:["$","$"]</para>
|
||
/// </summary>
|
||
public string inline_formula_wrapper { get; set; }
|
||
/// <summary>
|
||
/// 用于修改独立行公式在markdown中的包裹符号。以Json形式填入,如果格式错误将使用默认的包裹符号
|
||
/// <para>示例:["$$","$$"]</para>
|
||
/// </summary>
|
||
public string isolated_formula_wrapper { get; set; }
|
||
}
|
||
/// <summary>
|
||
/// ocr响应
|
||
/// </summary>
|
||
public class SimpleTexOcrResponse
|
||
{
|
||
public bool Success { get; set; }
|
||
public SimpleTexOcrResponseData Result { get; set; }
|
||
public string ResultStr { get; set; }
|
||
public string Error { get; set; }
|
||
}
|
||
|
||
|
||
public class SimpLetexClient
|
||
{
|
||
private readonly IHttpClientFactory _httpClientFactory;
|
||
public SimpLetexClient(
|
||
IHttpClientFactory httpClientFactory)
|
||
{
|
||
_httpClientFactory = httpClientFactory;
|
||
}
|
||
|
||
public async Task<SimpleTexOcrResponse> ProcessImageAsync(SimpleTexOcrRequest request)
|
||
{
|
||
var client = _httpClientFactory.CreateClient();
|
||
using var content = new MultipartFormDataContent();
|
||
var parameters = new Dictionary<string, string>();
|
||
|
||
// 添加文件内容
|
||
var fileContent = new StreamContent(request.file);
|
||
content.Add(fileContent, nameof(request.file), Path.GetFileName(request.file.Name));
|
||
|
||
// 添加并收集其他参数
|
||
if (request.rec_mode != "auto")
|
||
{
|
||
content.Add(new StringContent(request.rec_mode), nameof(request.rec_mode));
|
||
parameters[nameof(request.rec_mode)] = request.rec_mode;
|
||
}
|
||
|
||
var enableImgRotStr = request.enable_img_rot.ToString().ToLower();
|
||
content.Add(new StringContent(enableImgRotStr), nameof(request.enable_img_rot));
|
||
parameters[nameof(request.enable_img_rot)] = enableImgRotStr;
|
||
|
||
if (request.inline_formula_wrapper != null)
|
||
{
|
||
content.Add(new StringContent(request.inline_formula_wrapper), nameof(request.inline_formula_wrapper));
|
||
parameters[nameof(request.inline_formula_wrapper)] = request.inline_formula_wrapper;
|
||
}
|
||
|
||
if (request.isolated_formula_wrapper != null)
|
||
{
|
||
var isolatedWrapper = request.isolated_formula_wrapper.ToJson();
|
||
content.Add(new StringContent(isolatedWrapper), nameof(request.isolated_formula_wrapper));
|
||
parameters[nameof(request.isolated_formula_wrapper)] = isolatedWrapper;
|
||
}
|
||
|
||
// 生成鉴权参数
|
||
var randomStr = Guid.NewGuid().ToString("N").Substring(16);
|
||
var timestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds().ToString();
|
||
var appId = AppCommon.Config.SimpLetex.AppId;
|
||
|
||
parameters["timestamp"] = timestamp;
|
||
parameters["random-str"] = randomStr;
|
||
parameters["app-id"] = appId;
|
||
|
||
// 生成签名
|
||
var signStr = string.Join("&", parameters
|
||
.OrderBy(p => p.Key)
|
||
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}"))
|
||
+ $"&secret={AppCommon.Config.SimpLetex.AppSecret}";
|
||
var sign = ComputeMD5(signStr);
|
||
|
||
// 创建请求并添加Header
|
||
var requestMessage = new HttpRequestMessage(HttpMethod.Post,
|
||
AppCommon.Config.SimpLetex.Host + "simpletex_ocr")
|
||
{
|
||
Content = content
|
||
};
|
||
|
||
requestMessage.Headers.Add("random-str", randomStr);
|
||
requestMessage.Headers.Add("timestamp", timestamp);
|
||
requestMessage.Headers.Add("app-id", appId);
|
||
requestMessage.Headers.Add("sign", sign);
|
||
|
||
try
|
||
{
|
||
var response = await client.SendAsync(requestMessage);
|
||
var resStr = await response.Content.ReadAsStringAsync();
|
||
var responseContent = await response.Content.ReadFromJsonAsync<SimpleTexOcrResponseData>();
|
||
if (responseContent.res.DataInfo.ValueKind == JsonValueKind.Object)
|
||
{
|
||
responseContent.res.value = JsonSerializer.Deserialize<SimpleTexOcrResponseDataInfo>(
|
||
responseContent.res.DataInfo.GetRawText(),
|
||
new JsonSerializerOptions { PropertyNameCaseInsensitive = true }
|
||
)?.markdown??string.Empty;
|
||
// 处理字符串
|
||
}
|
||
else if (responseContent.res.DataInfo.ValueKind == JsonValueKind.String)
|
||
{
|
||
responseContent.res.value = responseContent.res.DataInfo.GetString();
|
||
}
|
||
|
||
request.file.Dispose();
|
||
return new SimpleTexOcrResponse
|
||
{
|
||
Success = response.IsSuccessStatusCode,
|
||
Result = responseContent,
|
||
ResultStr= resStr,
|
||
Error = response.IsSuccessStatusCode ? null : $"HTTP Error: {response.StatusCode}"
|
||
};
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
return new SimpleTexOcrResponse
|
||
{
|
||
Success = false,
|
||
Error = $"Request Failed: {ex.Message}"
|
||
};
|
||
}
|
||
}
|
||
|
||
private string GenerateSignatureString(IDictionary<string, string> parameters, string secret)
|
||
{
|
||
var sortedParams = parameters
|
||
.OrderBy(p => p.Key)
|
||
.Select(p => $"{p.Key}={Uri.EscapeDataString(p.Value)}");
|
||
|
||
return string.Join("&", sortedParams) + $"&secret={secret}";
|
||
}
|
||
|
||
private string ComputeMD5(string input)
|
||
{
|
||
using var md5 = MD5.Create();
|
||
var inputBytes = Encoding.UTF8.GetBytes(input);
|
||
var hashBytes = md5.ComputeHash(inputBytes);
|
||
return BitConverter.ToString(hashBytes).Replace("-", "").ToLower();
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// 服务注册扩展
|
||
/// </summary>
|
||
public static class SSimpLetexExtensions
|
||
{
|
||
public static IServiceCollection AddSimpleTexOcrClient(this IServiceCollection services)
|
||
{
|
||
services.AddSingleton<SimpLetexClient>();
|
||
return services;
|
||
}
|
||
}
|
||
}
|