using System;
using System.Collections.Generic;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
using static System.Runtime.InteropServices.JavaScript.JSType;
namespace VideoAnalysisCore.Common
{
public static class JsonExtractor
{
///
/// 修复字符串中不规范的反斜杠转义,使其符合 JSON 规范。
/// 特别适用于包含 LaTeX 公式(如 \overrightarrow, \unit)的非标准 JSON 数据。
///
public static string ToSafeJsonString(this string json)
{
if (string.IsNullOrEmpty(json)) return json;
// 预分配稍大一点的空间,避免频繁扩容
StringBuilder sb = new StringBuilder(json.Length + (json.Length / 10));
int i = 0;
int len = json.Length;
while (i < len)
{
char c = json[i];
if (c == '\\')
{
int start = i;
while (i < len && json[i] == '\\')
{
i++;
}
int count = i - start;
// 只有奇数个反斜杠才需要检查“尾巴”是否合法
if (count % 2 != 0)
{
// 检查这最后一个反斜杠后面跟的是不是合法的 JSON 转义字符
if (i >= len || !IsValidJsonEscape(json, i))
{
// 非法转义,补齐它
count++;
}
}
// 性能优化:直接添加指定数量的字符
sb.Append('\\', count);
}
else
{
sb.Append(c);
i++;
}
}
return sb.ToString();
}
private static bool IsValidJsonEscape(string text, int nextCharIndex)
{
char nextChar = text[nextCharIndex];
// 标准 JSON 简单转义
if (nextChar == '"' || nextChar == '\\' || nextChar == '/' ||
nextChar == 'b' || nextChar == 'f' || nextChar == 'n' ||
nextChar == 'r' || nextChar == 't')
return true;
// Unicode 转义检查: \uXXXX
if (nextChar == 'u')
{
if (nextCharIndex + 4 < text.Length)
{
for (int k = 1; k <= 4; k++)
{
char hex = text[nextCharIndex + k];
bool isHex = (hex >= '0' && hex <= '9') ||
(hex >= 'a' && hex <= 'f') ||
(hex >= 'A' && hex <= 'F');
if (!isHex) return false;
}
return true;
}
return false;
}
return false;
}
///
/// 提取json字符串
///
///
///
public static List ExtractJsonStrings(this string input)
{
input = input.ToSafeJsonString();
var results = new List();
if (string.IsNullOrWhiteSpace(input)) return results;
int braceCount = 0;
int bracketCount = 0;
int startIndex = -1;
bool inString = false;
bool isEscaped = false;
for (int i = 0; i < input.Length; i++)
{
char c = input[i];
// 1. 处理转义字符 (例如 \")
if (isEscaped)
{
isEscaped = false;
continue;
}
if (c == '\\')
{
isEscaped = true;
continue;
}
// 2. 处理字符串边界
if (c == '"')
{
inString = !inString;
continue;
}
// 3. 如果在字符串内,忽略括号逻辑
if (inString) continue;
// 4. 处理 JSON 对象和数组的开始
if (c == '{' || c == '[')
{
if (braceCount == 0 && bracketCount == 0)
{
startIndex = i;
}
if (c == '{') braceCount++;
else bracketCount++;
}
// 5. 处理 JSON 对象和数组的结束
else if (c == '}' || c == ']')
{
if (c == '}') braceCount--;
else bracketCount--;
if (braceCount == 0 && bracketCount == 0 && startIndex != -1)
{
string potentialJson = input.Substring(startIndex, i - startIndex + 1);
if (IsValidJson(potentialJson))
{
results.Add(potentialJson);
}
startIndex = -1;
}
}
}
return results;
}
public static bool IsValidJson(string candidate)
{
if (string.IsNullOrEmpty(candidate))
return false;
try
{
JsonDocument.Parse(candidate);
return true;
}
catch( Exception e)
{
return false;
}
}
}
}