181 lines
5.7 KiB
C#
181 lines
5.7 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.Text;
|
||
using System.Text.Json;
|
||
using System.Text.RegularExpressions;
|
||
using static System.Runtime.InteropServices.JavaScript.JSType;
|
||
|
||
namespace VideoAnalysisCore.Common
|
||
{
|
||
public static class JsonExtractor
|
||
{
|
||
/// <summary>
|
||
/// 修复字符串中不规范的反斜杠转义,使其符合 JSON 规范。
|
||
/// 特别适用于包含 LaTeX 公式(如 \overrightarrow, \unit)的非标准 JSON 数据。
|
||
/// </summary>
|
||
public static string ToSafeJsonString(this string json)
|
||
{
|
||
if (string.IsNullOrEmpty(json)) return json;
|
||
|
||
// 预分配稍大一点的空间,避免频繁扩容
|
||
StringBuilder sb = new StringBuilder(json.Length + (json.Length / 10));
|
||
int i = 0;
|
||
int len = json.Length;
|
||
|
||
while (i < len)
|
||
{
|
||
char c = json[i];
|
||
|
||
if (c == '\\')
|
||
{
|
||
int start = i;
|
||
while (i < len && json[i] == '\\')
|
||
{
|
||
i++;
|
||
}
|
||
int count = i - start;
|
||
|
||
// 只有奇数个反斜杠才需要检查“尾巴”是否合法
|
||
if (count % 2 != 0)
|
||
{
|
||
// 检查这最后一个反斜杠后面跟的是不是合法的 JSON 转义字符
|
||
if (i >= len || !IsValidJsonEscape(json, i))
|
||
{
|
||
// 非法转义,补齐它
|
||
count++;
|
||
}
|
||
}
|
||
|
||
// 性能优化:直接添加指定数量的字符
|
||
sb.Append('\\', count);
|
||
}
|
||
else
|
||
{
|
||
sb.Append(c);
|
||
i++;
|
||
}
|
||
}
|
||
|
||
return sb.ToString();
|
||
}
|
||
|
||
private static bool IsValidJsonEscape(string text, int nextCharIndex)
|
||
{
|
||
char nextChar = text[nextCharIndex];
|
||
|
||
// 标准 JSON 简单转义
|
||
if (nextChar == '"' || nextChar == '\\' || nextChar == '/' ||
|
||
nextChar == 'b' || nextChar == 'f' || nextChar == 'n' ||
|
||
nextChar == 'r' || nextChar == 't')
|
||
return true;
|
||
|
||
// Unicode 转义检查: \uXXXX
|
||
if (nextChar == 'u')
|
||
{
|
||
if (nextCharIndex + 4 < text.Length)
|
||
{
|
||
for (int k = 1; k <= 4; k++)
|
||
{
|
||
char hex = text[nextCharIndex + k];
|
||
bool isHex = (hex >= '0' && hex <= '9') ||
|
||
(hex >= 'a' && hex <= 'f') ||
|
||
(hex >= 'A' && hex <= 'F');
|
||
if (!isHex) return false;
|
||
}
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
/// <summary>
|
||
/// 提取json字符串
|
||
/// </summary>
|
||
/// <param name="input"></param>
|
||
/// <returns></returns>
|
||
public static List<string> ExtractJsonStrings(this string input)
|
||
{
|
||
input = input.ToSafeJsonString();
|
||
var results = new List<string>();
|
||
if (string.IsNullOrWhiteSpace(input)) return results;
|
||
|
||
int braceCount = 0;
|
||
int bracketCount = 0;
|
||
int startIndex = -1;
|
||
bool inString = false;
|
||
bool isEscaped = false;
|
||
|
||
for (int i = 0; i < input.Length; i++)
|
||
{
|
||
char c = input[i];
|
||
|
||
// 1. 处理转义字符 (例如 \")
|
||
if (isEscaped)
|
||
{
|
||
isEscaped = false;
|
||
continue;
|
||
}
|
||
|
||
if (c == '\\')
|
||
{
|
||
isEscaped = true;
|
||
continue;
|
||
}
|
||
|
||
// 2. 处理字符串边界
|
||
if (c == '"')
|
||
{
|
||
inString = !inString;
|
||
continue;
|
||
}
|
||
|
||
// 3. 如果在字符串内,忽略括号逻辑
|
||
if (inString) continue;
|
||
|
||
// 4. 处理 JSON 对象和数组的开始
|
||
if (c == '{' || c == '[')
|
||
{
|
||
if (braceCount == 0 && bracketCount == 0)
|
||
{
|
||
startIndex = i;
|
||
}
|
||
if (c == '{') braceCount++;
|
||
else bracketCount++;
|
||
}
|
||
// 5. 处理 JSON 对象和数组的结束
|
||
else if (c == '}' || c == ']')
|
||
{
|
||
if (c == '}') braceCount--;
|
||
else bracketCount--;
|
||
|
||
if (braceCount == 0 && bracketCount == 0 && startIndex != -1)
|
||
{
|
||
string potentialJson = input.Substring(startIndex, i - startIndex + 1);
|
||
if (IsValidJson(potentialJson))
|
||
{
|
||
results.Add(potentialJson);
|
||
}
|
||
startIndex = -1;
|
||
}
|
||
}
|
||
}
|
||
return results;
|
||
}
|
||
|
||
public static bool IsValidJson(string candidate)
|
||
{
|
||
if (string.IsNullOrEmpty(candidate))
|
||
return false;
|
||
try
|
||
{
|
||
JsonDocument.Parse(candidate);
|
||
return true;
|
||
}
|
||
catch( Exception e)
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
} |