148 lines
6.1 KiB
Python
148 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
诊断脚本:验证图片 URL 和 Word 文档 URL 是否可访问
|
||
"""
|
||
|
||
import urllib.request
|
||
import requests
|
||
import sys
|
||
from typing import Dict, List, Tuple
|
||
|
||
|
||
def check_url(url: str, headers: Dict[str, str] = None, timeout: int = 10) -> Tuple[bool, str, int]:
|
||
"""
|
||
检查 URL 是否可访问
|
||
|
||
Args:
|
||
url: 要检查的 URL
|
||
headers: HTTP 头
|
||
timeout: 超时时间(秒)
|
||
|
||
Returns:
|
||
(is_valid, error_message, status_code)
|
||
"""
|
||
if headers is None:
|
||
headers = {
|
||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||
'Accept': '*/*',
|
||
}
|
||
|
||
try:
|
||
req = urllib.request.Request(url, headers=headers)
|
||
with urllib.request.urlopen(req, timeout=timeout) as response:
|
||
status_code = response.getcode()
|
||
content_type = response.headers.get('Content-Type', '')
|
||
|
||
# 读取前 100 字节检查
|
||
preview = response.read(100)
|
||
|
||
# 检查是否为 HTML 错误页面
|
||
if b'<html' in preview.lower() or b'<!doctype' in preview.lower():
|
||
error_msg = f"返回 HTML 页面(状态码: {status_code})"
|
||
if b'404' in preview.lower():
|
||
error_msg = "返回 404 错误页面"
|
||
elif b'403' in preview.lower():
|
||
error_msg = "返回 403 禁止访问页面"
|
||
return (False, error_msg, status_code)
|
||
|
||
# 检查内容类型
|
||
if 'image' in content_type:
|
||
return (True, f"图片 - {content_type}", status_code)
|
||
elif 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' in content_type or 'application/msword' in content_type:
|
||
return (True, f"Word 文档 - {content_type}", status_code)
|
||
else:
|
||
return (True, f"其他类型 - {content_type}", status_code)
|
||
|
||
except urllib.error.HTTPError as e:
|
||
status_code = e.code
|
||
error_msg = f"HTTP 错误 {status_code}"
|
||
if status_code == 404:
|
||
error_msg = "404 Not Found - 资源不存在"
|
||
elif status_code == 403:
|
||
error_msg = "403 Forbidden - 禁止访问"
|
||
elif status_code == 401:
|
||
error_msg = "401 Unauthorized - 需要认证"
|
||
return (False, error_msg, status_code)
|
||
except urllib.error.URLError as e:
|
||
return (False, f"网络错误: {str(e)[:100]}", 0)
|
||
except Exception as e:
|
||
return (False, f"未知错误: {str(e)[:100]}", 0)
|
||
|
||
|
||
def main():
|
||
# 测试 URL 列表
|
||
image_urls = [
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/18/69baa4f5-4826-4901-00e1-c6e66f02947f.jpg?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/25/69c344ba-4826-4901-00e1-c6ff235a12b2.jpg?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/25/69c344ba-4826-4901-00e1-c7055a396422.jpg?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/25/69c344ba-4826-4901-00e1-c6fe7f9c07ef.jpg?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/25/69c344ba-4826-4901-00e1-c70052d895e3.jpg?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/25/69c344ba-4826-4901-00e1-c6fc6ca7c4bf.png?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/25/69c33f31-4826-4901-00e1-c6fb50697e06.png?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/23/69c1029b-4826-4901-00e1-c6f614bc06d9.jpg?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/25/69c344ba-4826-4901-00e1-c6fd479e0a0e.jpg?x-oss-process=image/resize,w_1000",
|
||
"https://dpcclass.oss-cn-beijing.aliyuncs.com/umsupload/2026/03/23/69c1029b-4826-4901-00e1-c6f569b31a14.jpeg?x-oss-process=image/resize,w_1000",
|
||
]
|
||
|
||
doc_url = "https://dpc-oss.23544.com/umsupload/2026/03/25/69c353d0-4826-4901-00e1-c7081bcab988.docx"
|
||
|
||
print("=" * 80)
|
||
print("图片 URL 诊断")
|
||
print("=" * 80)
|
||
|
||
valid_count = 0
|
||
invalid_count = 0
|
||
|
||
for i, url in enumerate(image_urls, 1):
|
||
is_valid, msg, status_code = check_url(url)
|
||
status = "✅ 有效" if is_valid else "❌ 无效"
|
||
print(f"{i}. {status} - {msg}")
|
||
print(f" URL: {url}")
|
||
print(f" 状态码: {status_code}")
|
||
print()
|
||
|
||
if is_valid:
|
||
valid_count += 1
|
||
else:
|
||
invalid_count += 1
|
||
|
||
print("=" * 80)
|
||
print(f"汇总: {valid_count} 个有效,{invalid_count} 个无效")
|
||
print("=" * 80)
|
||
print()
|
||
|
||
print("=" * 80)
|
||
print("Word 文档 URL 诊断")
|
||
print("=" * 80)
|
||
|
||
is_valid, msg, status_code = check_url(doc_url)
|
||
status = "✅ 有效" if is_valid else "❌ 无效"
|
||
print(f"{status} - {msg}")
|
||
print(f"URL: {doc_url}")
|
||
print(f"状态码: {status_code}")
|
||
print()
|
||
|
||
if invalid_count > 0:
|
||
print("=" * 80)
|
||
print("⚠️ 警告:部分 URL 无法访问")
|
||
print("=" * 80)
|
||
print("可能的原因:")
|
||
print("1. URL 已过期(阿里云 OSS 签名 URL 有时效性)")
|
||
print("2. 访问权限不足(需要认证或 IP 白名单)")
|
||
print("3. 网络连接问题(Docker 容器网络配置)")
|
||
print("4. 文件已被删除或移动")
|
||
print()
|
||
print("建议:")
|
||
print("- 检查这些 URL 是否在浏览器中可以访问")
|
||
print("- 如果使用签名 URL,确保 URL 未过期")
|
||
print("- 检查 Docker 容器的网络配置和 DNS 设置")
|
||
print("- 考虑使用公开可访问的 URL 或配置 OSS 访问权限")
|
||
sys.exit(1)
|
||
else:
|
||
print("✅ 所有 URL 均可访问")
|
||
sys.exit(0)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|