Learn.VideoAnalysis/VideoAnalysisCore/Common/JsonExtractor.cs

181 lines
5.7 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using static System.Runtime.InteropServices.JavaScript.JSType;
namespace VideoAnalysisCore.Common
{
public static class JsonExtractor
{
/// <summary>
/// 修复字符串中不规范的反斜杠转义,使其符合 JSON 规范。
/// 特别适用于包含 LaTeX 公式(如 \overrightarrow, \unit的非标准 JSON 数据。
/// </summary>
public static string ToSafeJsonString(this string json)
{
if (string.IsNullOrEmpty(json)) return json;
// 预分配稍大一点的空间,避免频繁扩容
StringBuilder sb = new StringBuilder(json.Length + (json.Length / 10));
int i = 0;
int len = json.Length;
while (i < len)
{
char c = json[i];
if (c == '\\')
{
int start = i;
while (i < len && json[i] == '\\')
{
i++;
}
int count = i - start;
// 只有奇数个反斜杠才需要检查“尾巴”是否合法
if (count % 2 != 0)
{
// 检查这最后一个反斜杠后面跟的是不是合法的 JSON 转义字符
if (i >= len || !IsValidJsonEscape(json, i))
{
// 非法转义,补齐它
count++;
}
}
// 性能优化:直接添加指定数量的字符
sb.Append('\\', count);
}
else
{
sb.Append(c);
i++;
}
}
return sb.ToString();
}
private static bool IsValidJsonEscape(string text, int nextCharIndex)
{
char nextChar = text[nextCharIndex];
// 标准 JSON 简单转义
if (nextChar == '"' || nextChar == '\\' || nextChar == '/' ||
nextChar == 'b' || nextChar == 'f' || nextChar == 'n' ||
nextChar == 'r' || nextChar == 't')
return true;
// Unicode 转义检查: \uXXXX
if (nextChar == 'u')
{
if (nextCharIndex + 4 < text.Length)
{
for (int k = 1; k <= 4; k++)
{
char hex = text[nextCharIndex + k];
bool isHex = (hex >= '0' && hex <= '9') ||
(hex >= 'a' && hex <= 'f') ||
(hex >= 'A' && hex <= 'F');
if (!isHex) return false;
}
return true;
}
return false;
}
return false;
}
/// <summary>
/// 提取json字符串
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static List<string> ExtractJsonStrings(this string input)
{
input = input.ToSafeJsonString();
var results = new List<string>();
if (string.IsNullOrWhiteSpace(input)) return results;
int braceCount = 0;
int bracketCount = 0;
int startIndex = -1;
bool inString = false;
bool isEscaped = false;
for (int i = 0; i < input.Length; i++)
{
char c = input[i];
// 1. 处理转义字符 (例如 \")
if (isEscaped)
{
isEscaped = false;
continue;
}
if (c == '\\')
{
isEscaped = true;
continue;
}
// 2. 处理字符串边界
if (c == '"')
{
inString = !inString;
continue;
}
// 3. 如果在字符串内,忽略括号逻辑
if (inString) continue;
// 4. 处理 JSON 对象和数组的开始
if (c == '{' || c == '[')
{
if (braceCount == 0 && bracketCount == 0)
{
startIndex = i;
}
if (c == '{') braceCount++;
else bracketCount++;
}
// 5. 处理 JSON 对象和数组的结束
else if (c == '}' || c == ']')
{
if (c == '}') braceCount--;
else bracketCount--;
if (braceCount == 0 && bracketCount == 0 && startIndex != -1)
{
string potentialJson = input.Substring(startIndex, i - startIndex + 1);
if (IsValidJson(potentialJson))
{
results.Add(potentialJson);
}
startIndex = -1;
}
}
}
return results;
}
public static bool IsValidJson(string candidate)
{
if (string.IsNullOrEmpty(candidate))
return false;
try
{
JsonDocument.Parse(candidate);
return true;
}
catch( Exception e)
{
return false;
}
}
}
}