using System; using System.Collections.Generic; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using static System.Runtime.InteropServices.JavaScript.JSType; namespace VideoAnalysisCore.Common { public static class JsonExtractor { /// /// 修复字符串中不规范的反斜杠转义,使其符合 JSON 规范。 /// 特别适用于包含 LaTeX 公式(如 \overrightarrow, \unit)的非标准 JSON 数据。 /// public static string ToSafeJsonString(this string json) { if (string.IsNullOrEmpty(json)) return json; // 预分配稍大一点的空间,避免频繁扩容 StringBuilder sb = new StringBuilder(json.Length + (json.Length / 10)); int i = 0; int len = json.Length; while (i < len) { char c = json[i]; if (c == '\\') { int start = i; while (i < len && json[i] == '\\') { i++; } int count = i - start; // 只有奇数个反斜杠才需要检查“尾巴”是否合法 if (count % 2 != 0) { // 检查这最后一个反斜杠后面跟的是不是合法的 JSON 转义字符 if (i >= len || !IsValidJsonEscape(json, i)) { // 非法转义,补齐它 count++; } } // 性能优化:直接添加指定数量的字符 sb.Append('\\', count); } else { sb.Append(c); i++; } } return sb.ToString(); } private static bool IsValidJsonEscape(string text, int nextCharIndex) { char nextChar = text[nextCharIndex]; // 标准 JSON 简单转义 if (nextChar == '"' || nextChar == '\\' || nextChar == '/' || nextChar == 'b' || nextChar == 'f' || nextChar == 'n' || nextChar == 'r' || nextChar == 't') return true; // Unicode 转义检查: \uXXXX if (nextChar == 'u') { if (nextCharIndex + 4 < text.Length) { for (int k = 1; k <= 4; k++) { char hex = text[nextCharIndex + k]; bool isHex = (hex >= '0' && hex <= '9') || (hex >= 'a' && hex <= 'f') || (hex >= 'A' && hex <= 'F'); if (!isHex) return false; } return true; } return false; } return false; } /// /// 提取json字符串 /// /// /// public static List ExtractJsonStrings(this string input) { input = input.ToSafeJsonString(); var results = new List(); if (string.IsNullOrWhiteSpace(input)) return results; int braceCount = 0; int bracketCount = 0; int startIndex = -1; bool inString = false; bool isEscaped = false; for (int i = 0; i < input.Length; i++) { char c = input[i]; // 1. 处理转义字符 (例如 \") if (isEscaped) { isEscaped = false; continue; } if (c == '\\') { isEscaped = true; continue; } // 2. 处理字符串边界 if (c == '"') { inString = !inString; continue; } // 3. 如果在字符串内,忽略括号逻辑 if (inString) continue; // 4. 处理 JSON 对象和数组的开始 if (c == '{' || c == '[') { if (braceCount == 0 && bracketCount == 0) { startIndex = i; } if (c == '{') braceCount++; else bracketCount++; } // 5. 处理 JSON 对象和数组的结束 else if (c == '}' || c == ']') { if (c == '}') braceCount--; else bracketCount--; if (braceCount == 0 && bracketCount == 0 && startIndex != -1) { string potentialJson = input.Substring(startIndex, i - startIndex + 1); if (IsValidJson(potentialJson)) { results.Add(potentialJson); } startIndex = -1; } } } return results; } public static bool IsValidJson(string candidate) { if (string.IsNullOrEmpty(candidate)) return false; try { JsonDocument.Parse(candidate); return true; } catch( Exception e) { return false; } } } }