From f9a356669f3b04855f17987abd35ad231f4133b9 Mon Sep 17 00:00:00 2001 From: zhangquan Date: Tue, 31 Mar 2026 15:41:21 +0800 Subject: [PATCH] 1 --- .env | 114 ++++-- .env.example | 88 +++++ DEPLOYMENT.md | 369 ++++++++++++++++++ quick-start.sh | 128 ++++++ src/graphs/nodes/doc_extract_node.py | 50 ++- .../nodes/recognize_and_correct_node.py | 52 ++- 6 files changed, 731 insertions(+), 70 deletions(-) create mode 100644 .env.example create mode 100644 DEPLOYMENT.md create mode 100644 quick-start.sh diff --git a/.env b/.env index f833649..30a6341 100644 --- a/.env +++ b/.env @@ -1,56 +1,88 @@ -# ============================================ -# 初中物理作业批改工作流 - 环境变量配置示例 -# ============================================ -# 复制此文件为 .env 并填写实际值 -# cp .env.example .env +# 环境变量配置示例 +# 复制此文件为 .env 并填入实际值 -# ============================================ -# 必需配置 - 大语言模型 API -# ============================================ +# ==================== Coze API 配置 ==================== -# LLM API 密钥(从火山引擎或OpenAI获取) -LLM_API_KEY=eyJhbGciOiJSUzI1NiIsImtpZCI6ImRmZmU2NmYxLTg0MDMtNDc5Ni05ZmRhLTViMmJjZWExM2ViOCJ9.eyJpc3MiOiJodHRwczovL2FwaS5jb3plLmNuIiwiYXVkIjpbInRVaHJod1VDMmFHRmVSRjZBU01NNGxITjhoT01jOVA2Il0sImV4cCI6ODIxMDI2Njg3Njc5OSwiaWF0IjoxNzc0NjcwMTM1LCJzdWIiOiJzcGlmZmU6Ly9hcGkuY296ZS5jbi93b3JrbG9hZF9pZGVudGl0eS9pZDo3NjIxMzY5Mjg5OTI4MzQzNTY3Iiwic3JjIjoiaW5ib3VuZF9hdXRoX2FjY2Vzc190b2tlbl9pZDo3NjIyMTUwMTkzNTI1NjIwNzYyIn0.qIgzvuMGj976ekcmxZHIWlATn58PyPmFrsoYPeqTfSX4kdvkgeIXMEGR1NAX-OcOmBY_T8tvjcY2sxNDqRhLSQi-6teONHIYkTyXSrA_T5eJqaqrylFmzWPWzqX41LBsav5cyR0n4ffYzqLSd0-iAf8HdUyMEhVuTZuv-nGSpaQ-al98TqcrPtLqte71J1VbbAsjzFMrawTaSOe6WSiIQNe1qNDmsoyQpu_qdw5Sh_nMbPN8V5tjNFlX04pNV6O60M_Bqr1hDxAqY_fsUeECBhE5uG29cYawxc5oEb-xZF0vM_a0gvU5cw2jV1OktNXGJ6A2S9btRfyqoAc3fFqxmw -COZE_WORKLOAD_IDENTITY_API_KEY=eyJhbGciOiJSUzI1NiIsImtpZCI6ImRmZmU2NmYxLTg0MDMtNDc5Ni05ZmRhLTViMmJjZWExM2ViOCJ9.eyJpc3MiOiJodHRwczovL2FwaS5jb3plLmNuIiwiYXVkIjpbInRVaHJod1VDMmFHRmVSRjZBU01NNGxITjhoT01jOVA2Il0sImV4cCI6ODIxMDI2Njg3Njc5OSwiaWF0IjoxNzc0NjcwMTM1LCJzdWIiOiJzcGlmZmU6Ly9hcGkuY296ZS5jbi93b3JrbG9hZF9pZGVudGl0eS9pZDo3NjIxMzY5Mjg5OTI4MzQzNTY3Iiwic3JjIjoiaW5ib3VuZF9hdXRoX2FjY2Vzc190b2tlbl9pZDo3NjIyMTUwMTkzNTI1NjIwNzYyIn0.qIgzvuMGj976ekcmxZHIWlATn58PyPmFrsoYPeqTfSX4kdvkgeIXMEGR1NAX-OcOmBY_T8tvjcY2sxNDqRhLSQi-6teONHIYkTyXSrA_T5eJqaqrylFmzWPWzqX41LBsav5cyR0n4ffYzqLSd0-iAf8HdUyMEhVuTZuv-nGSpaQ-al98TqcrPtLqte71J1VbbAsjzFMrawTaSOe6WSiIQNe1qNDmsoyQpu_qdw5Sh_nMbPN8V5tjNFlX04pNV6O60M_Bqr1hDxAqY_fsUeECBhE5uG29cYawxc5oEb-xZF0vM_a0gvU5cw2jV1OktNXGJ6A2S9btRfyqoAc3fFqxmw +# Coze API 访问密钥(必需) +# 获取方式:https://www.coze.cn/ +COZE_API_KEY=Bearer pat_N0naC1MsQ5cLAQjtNB8pfzzyT0kZWXBSaOOLLsN03TYgChkDgnCXDyfUvc1A68I2 -# LLM API 基础URL -# 火山引擎: https://ark.cn-beijing.volces.com/api/v3 -# OpenAI: https://api.openai.com/v1 -LLM_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 +# 大语言模型 API 密钥(必需) +LLM_API_KEY=Bearer eyJhbGciOiJSUzI1NiIsImtpZCI6IjhiNmUwZWIwLTU2MGItNDFjMi1hODY4LTlmMzI4Y2FhZjAzNSJ9.eyJpc3MiOiJodHRwczovL2FwaS5jb3plLmNuIiwiYXVkIjpbIkZVS1kzOVR0dFlSdmlNaldGVmNjaUg0NWFPblp2TGxpIl0sImV4cCI6ODIxMDI2Njg3Njc5OSwiaWF0IjoxNzc0OTQyMTE5LCJzdWIiOiJzcGlmZmU6Ly9hcGkuY296ZS5jbi93b3JrbG9hZF9pZGVudGl0eS9pZDo3NjIyMjM4NzUyNjQyOTU3MzQ3Iiwic3JjIjoiaW5ib3VuZF9hdXRoX2FjY2Vzc190b2tlbl9pZDo3NjIzMzE4MzU0OTE2Mjc4MzA2In0.UHvct0nIH6hnKaHT2z3bfvLeNcyrgrnqYE3itr9ddl1mGHS0bAtGQ7lDCZwObCPrA_TqnaaAS8dW20ctpTd8Fwyr0nl-eKylKH40BvJ8DbsZdKbqKQgl3yxM14Bmy1VjfNyKRyVhE11RfOIAk_xgRS0k2lj9tyJNv0NFSNUPSFVA8ApNUXTALy8YdqnIbcfhWroCQTR9RkqsRuJ7GpMrODHxZcdYCXj2XF1nyeK68HCUB_2XcGFqccWkLQxMa9Y7mezkatT1txzQcP7gdN01Y2KwFkjxnpIv8qVVyB3UzgNDsEqqZWEh8ft-qMgPDcNc-YCB8r7-zxmMolcCp6fhlA -# 模型名称 -# 火山引擎推荐: doubao-seed-2-0-pro-260215 -# OpenAI推荐: gpt-4o -LLM_MODEL_NAME=doubao-seed-2-0-pro-260215 +# 工作负载身份认证密钥(必需) +COZE_WORKLOAD_IDENTITY_API_KEY=Bearer eyJhbGciOiJSUzI1NiIsImtpZCI6IjhiNmUwZWIwLTU2MGItNDFjMi1hODY4LTlmMzI4Y2FhZjAzNSJ9.eyJpc3MiOiJodHRwczovL2FwaS5jb3plLmNuIiwiYXVkIjpbIkZVS1kzOVR0dFlSdmlNaldGVmNjaUg0NWFPblp2TGxpIl0sImV4cCI6ODIxMDI2Njg3Njc5OSwiaWF0IjoxNzc0OTQyMTE5LCJzdWIiOiJzcGlmZmU6Ly9hcGkuY296ZS5jbi93b3JrbG9hZF9pZGVudGl0eS9pZDo3NjIyMjM4NzUyNjQyOTU3MzQ3Iiwic3JjIjoiaW5ib3VuZF9hdXRoX2FjY2Vzc190b2tlbl9pZDo3NjIzMzE4MzU0OTE2Mjc4MzA2In0.UHvct0nIH6hnKaHT2z3bfvLeNcyrgrnqYE3itr9ddl1mGHS0bAtGQ7lDCZwObCPrA_TqnaaAS8dW20ctpTd8Fwyr0nl-eKylKH40BvJ8DbsZdKbqKQgl3yxM14Bmy1VjfNyKRyVhE11RfOIAk_xgRS0k2lj9tyJNv0NFSNUPSFVA8ApNUXTALy8YdqnIbcfhWroCQTR9RkqsRuJ7GpMrODHxZcdYCXj2XF1nyeK68HCUB_2XcGFqccWkLQxMa9Y7mezkatT1txzQcP7gdN01Y2KwFkjxnpIv8qVVyB3UzgNDsEqqZWEh8ft-qMgPDcNc-YCB8r7-zxmMolcCp6fhlA -# 注意:不需要配置对象存储(S3/TOS/OSS等) -# 图片直接使用原始URL,不上传存储 +# 集成 API 密钥(必需) +COZE_INTEGRATION_API_KEY=Bearer eyJhbGciOiJSUzI1NiIsImtpZCI6IjhiNmUwZWIwLTU2MGItNDFjMi1hODY4LTlmMzI4Y2FhZjAzNSJ9.eyJpc3MiOiJodHRwczovL2FwaS5jb3plLmNuIiwiYXVkIjpbIkZVS1kzOVR0dFlSdmlNaldGVmNjaUg0NWFPblp2TGxpIl0sImV4cCI6ODIxMDI2Njg3Njc5OSwiaWF0IjoxNzc0OTQyMTE5LCJzdWIiOiJzcGlmZmU6Ly9hcGkuY296ZS5jbi93b3JrbG9hZF9pZGVudGl0eS9pZDo3NjIyMjM4NzUyNjQyOTU3MzQ3Iiwic3JjIjoiaW5ib3VuZF9hdXRoX2FjY2Vzc190b2tlbl9pZDo3NjIzMzE4MzU0OTE2Mjc4MzA2In0.UHvct0nIH6hnKaHT2z3bfvLeNcyrgrnqYE3itr9ddl1mGHS0bAtGQ7lDCZwObCPrA_TqnaaAS8dW20ctpTd8Fwyr0nl-eKylKH40BvJ8DbsZdKbqKQgl3yxM14Bmy1VjfNyKRyVhE11RfOIAk_xgRS0k2lj9tyJNv0NFSNUPSFVA8ApNUXTALy8YdqnIbcfhWroCQTR9RkqsRuJ7GpMrODHxZcdYCXj2XF1nyeK68HCUB_2XcGFqccWkLQxMa9Y7mezkatT1txzQcP7gdN01Y2KwFkjxnpIv8qVVyB3UzgNDsEqqZWEh8ft-qMgPDcNc-YCB8r7-zxmMolcCp6fhlA +# 工作空间 ID(必需) +COZE_WORKSPACE_ID=7622233047848583202 -# ============================================ -# 可选配置 - 日志与缓存 -# ============================================ +# ==================== Coze API 基础 URL ==================== -# 日志级别: DEBUG, INFO, WARNING, ERROR +# Coze 集成基础 URL +COZE_INTEGRATION_BASE_URL=https://api.coze.cn/v1 + +# Coze 模型基础 URL +COZE_INTEGRATION_MODEL_BASE_URL=https://api.coze.cn/v1 + +# LLM 基础 URL +LLM_BASE_URL=https://api.coze.cn/v1 + +# ==================== 模型配置 ==================== + +# 默认文本模型 +LLM_MODEL_NAME=doubao-seed-1-8-251228 + +# 默认视觉模型 +VISION_MODEL_NAME=doubao-seed-1-6-vision-250815 + +# ==================== 服务配置 ==================== + +# 服务端口 +PORT=8000 + +# 工作线程数 +WORKERS=1 + +# 超时时间(秒) +TIMEOUT_SECONDS=600 + +# ==================== 缓存配置 ==================== + +# 缓存目录 +CACHE_DIR=/tmp/homework_cache + +# 答案文档缓存目录 +ANSWER_DOC_CACHE_DIR=/tmp/homework_cache/math_answer_doc + +# 评分标准缓存目录 +GRADE_STANDARDS_CACHE_DIR=/tmp/homework_cache/grade_standards + +# ==================== 并发配置 ==================== + +# 最大并发数(1-50) +MAX_CONCURRENT_TASKS=10 + +# ==================== 日志配置 ==================== + +# 日志级别 LOG_LEVEL=INFO -# 缓存目录(默认: /tmp/cache) -CACHE_DIR=/tmp/cache +# 日志文件路径 +LOG_FILE=/tmp/work/logs/bypass/app.log -# 单张图片处理超时(秒,默认: 120) -SINGLE_IMAGE_TIMEOUT=120 +# 日志最大大小(字节) +LOG_MAX_BYTES=104857600 +# 日志备份数量 +LOG_BACKUP_COUNT=5 -# ============================================ -# 可选配置 - 并发控制 -# ============================================ +# ==================== 其他配置 ==================== -# 最大并发数(默认: 10) -MAX_CONCURRENT=10 +# Coze 项目环境(DEV/PROD) +COZE_PROJECT_ENV=PROD - -# ============================================ -# 工作目录(系统自动设置,无需修改) -# ============================================ - -# 工作目录路径(由系统自动设置) -# COZE_WORKSPACE_PATH=/workspace/projects +# 启用调试模式 +DEBUG=false diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..f8973bc --- /dev/null +++ b/.env.example @@ -0,0 +1,88 @@ +# 环境变量配置示例 +# 复制此文件为 .env 并填入实际值 + +# ==================== Coze API 配置 ==================== + +# Coze API 访问密钥(必需) +# 获取方式:https://www.coze.cn/ +COZE_API_KEY=Bearer YOUR_COZE_API_KEY_HERE + +# 大语言模型 API 密钥(必需) +LLM_API_KEY=Bearer YOUR_LLM_API_KEY_HERE + +# 工作负载身份认证密钥(必需) +COZE_WORKLOAD_IDENTITY_API_KEY=Bearer YOUR_WORKLOAD_IDENTITY_KEY_HERE + +# 集成 API 密钥(必需) +COZE_INTEGRATION_API_KEY=Bearer YOUR_INTEGRATION_API_KEY_HERE + +# 工作空间 ID(必需) +COZE_WORKSPACE_ID=YOUR_WORKSPACE_ID_HERE + +# ==================== Coze API 基础 URL ==================== + +# Coze 集成基础 URL +COZE_INTEGRATION_BASE_URL=https://api.coze.cn/v1 + +# Coze 模型基础 URL +COZE_INTEGRATION_MODEL_BASE_URL=https://api.coze.cn/v1 + +# LLM 基础 URL +LLM_BASE_URL=https://api.coze.cn/v1 + +# ==================== 模型配置 ==================== + +# 默认文本模型 +LLM_MODEL_NAME=doubao-seed-1-8-251228 + +# 默认视觉模型 +VISION_MODEL_NAME=doubao-seed-1-6-vision-250815 + +# ==================== 服务配置 ==================== + +# 服务端口 +PORT=8000 + +# 工作线程数 +WORKERS=1 + +# 超时时间(秒) +TIMEOUT_SECONDS=600 + +# ==================== 缓存配置 ==================== + +# 缓存目录 +CACHE_DIR=/tmp/homework_cache + +# 答案文档缓存目录 +ANSWER_DOC_CACHE_DIR=/tmp/homework_cache/math_answer_doc + +# 评分标准缓存目录 +GRADE_STANDARDS_CACHE_DIR=/tmp/homework_cache/grade_standards + +# ==================== 并发配置 ==================== + +# 最大并发数(1-50) +MAX_CONCURRENT_TASKS=10 + +# ==================== 日志配置 ==================== + +# 日志级别 +LOG_LEVEL=INFO + +# 日志文件路径 +LOG_FILE=/tmp/work/logs/bypass/app.log + +# 日志最大大小(字节) +LOG_MAX_BYTES=104857600 + +# 日志备份数量 +LOG_BACKUP_COUNT=5 + +# ==================== 其他配置 ==================== + +# Coze 项目环境(DEV/PROD) +COZE_PROJECT_ENV=PROD + +# 启用调试模式 +DEBUG=false diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..0b379a4 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,369 @@ +# 数学作业批改系统 - 部署指南 + +## 📋 项目概述 + +这是一个基于 LangGraph 的初中数学作业批改工作流系统,支持: +- ✅ 图片上传识别 +- ✅ 自动批改 +- ✅ 坐标定位 +- ✅ 多学生多图片并行处理 +- ✅ Word 文档答案解析 +- ✅ 流式响应 + +--- + +## 🔧 前置条件 + +### 1. **Docker 环境** + +```bash +# 安装 Docker +# Ubuntu/Debian +sudo apt-get update +sudo apt-get install -y docker.io docker-compose + +# CentOS/RHEL +sudo yum install -y docker docker-compose + +# macOS +brew install docker docker-compose + +# 启动 Docker +sudo systemctl start docker +sudo systemctl enable docker +``` + +### 2. **API 密钥**(重要!) + +项目依赖 Coze API,需要以下密钥: + +| 环境变量 | 说明 | 获取方式 | +|---------|------|---------| +| `COZE_API_KEY` | Coze API 访问密钥 | [Coze 控制台](https://www.coze.cn/) | +| `LLM_API_KEY` | 大语言模型 API 密钥 | [Coze 控制台](https://www.coze.cn/) | +| `COZE_WORKLOAD_IDENTITY_API_KEY` | 工作负载身份认证密钥 | [Coze 控制台](https://www.coze.cn/) | +| `COZE_INTEGRATION_API_KEY` | 集成 API 密钥 | [Coze 控制台](https://www.coze.cn/) | +| `COZE_WORKSPACE_ID` | 工作空间 ID | [Coze 控制台](https://www.coze.cn/) | + +**获取密钥步骤**: +1. 注册/登录 [Coze 平台](https://www.coze.cn/) +2. 创建工作空间 +3. 在 API 密钥管理中获取相关密钥 + +### 3. **网络要求** + +- ✅ 可访问 `api.coze.cn`(中国区) +- ✅ 可访问阿里云 OSS(用于存储图片/答案文档) +- ✅ 防火墙允许出站 HTTPS 连接(443 端口) + +### 4. **系统资源要求** + +| 资源 | 最低配置 | 推荐配置 | +|------|---------|---------| +| CPU | 2 核 | 4 核+ | +| 内存 | 4 GB | 8 GB+ | +| 磁盘 | 10 GB | 20 GB+ | +| 网络 | 1 Mbps | 10 Mbps+ | + +--- + +## 🚀 部署步骤 + +### 方式一:使用 Dockerfile(推荐) + +#### 1. 克隆/下载项目 + +```bash +# 方式一:使用 git clone +git clone +cd + +# 方式二:下载压缩包并解压 +unzip .zip +cd +``` + +#### 2. 修改 API 密钥 + +编辑 `assets/Dockerfile`,替换第 47-51 行的环境变量: + +```dockerfile +ENV COZE_WORKLOAD_IDENTITY_API_KEY="Bearer YOUR_KEY_HERE" \ + LLM_API_KEY="Bearer YOUR_KEY_HERE" \ + COZE_API_KEY="Bearer YOUR_KEY_HERE" \ + COZE_INTEGRATION_API_KEY="Bearer YOUR_KEY_HERE" \ + COZE_WORKSPACE_ID=YOUR_WORKSPACE_ID +``` + +⚠️ **注意**:请将 `YOUR_KEY_HERE` 和 `YOUR_WORKSPACE_ID` 替换为你的实际密钥。 + +#### 3. 构建 Docker 镜像 + +```bash +docker build -f assets/Dockerfile -t math-correction:latest . +``` + +**构建时间**:约 5-10 分钟(首次构建较慢) + +#### 4. 运行容器 + +```bash +docker run -d \ + --name math-correction \ + -p 8000:8000 \ + -v /path/to/data:/app/data \ + math-correction:latest +``` + +**参数说明**: +- `-d`:后台运行 +- `--name`:容器名称 +- `-p 8000:8000`:端口映射(主机:容器) +- `-v /path/to/data:/app/data`:数据卷挂载(可选) + +#### 5. 验证运行 + +```bash +# 查看容器日志 +docker logs -f math-correction + +# 检查健康状态 +curl http://localhost:8000/health + +# 测试接口 +curl -X POST http://localhost:8000/run \ + -H "Content-Type: application/json" \ + -d '{"student_homework": []}' +``` + +--- + +### 方式二:使用 Docker Compose(更方便) + +#### 1. 创建 `docker-compose.yml` + +```yaml +version: '3.8' + +services: + math-correction: + build: + context: . + dockerfile: assets/Dockerfile + container_name: math-correction + ports: + - "8000:8000" + environment: + # 替换为你的实际密钥 + - COZE_WORKLOAD_IDENTITY_API_KEY=Bearer YOUR_KEY_HERE + - LLM_API_KEY=Bearer YOUR_KEY_HERE + - COZE_API_KEY=Bearer YOUR_KEY_HERE + - COZE_INTEGRATION_API_KEY=Bearer YOUR_KEY_HERE + - COZE_WORKSPACE_ID=YOUR_WORKSPACE_ID + volumes: + - ./data:/app/data + restart: unless-stopped +``` + +#### 2. 启动服务 + +```bash +# 构建并启动 +docker-compose up -d + +# 查看日志 +docker-compose logs -f + +# 停止服务 +docker-compose down +``` + +--- + +## 📝 API 接口说明 + +### 1. **HTTP 模式**(同步/异步) + +```bash +# 端点 +POST http://localhost:8000/run +POST http://localhost:8000/stream_run + +# 请求头 +Content-Type: application/json + +# 请求体示例 +{ + "student_homework": [ + { + "student_id": 1, + "student_name": "张三", + "homework_images": [ + "https://example.com/homework1.jpg", + "https://example.com/homework2.jpg" + ] + } + ], + "answer_doc_url": "https://example.com/answers.docx" +} +``` + +### 2. **流式响应模式** + +```bash +# 端点 +POST http://localhost:8000/stream_run + +# 响应格式(SSE) +data: {"event": "workflow_start", "data": {...}} +data: {"event": "ping", "data": {...}} +data: {"event": "workflow_end", "data": {...}} +``` + +--- + +## 🔍 常见问题排查 + +### 1. **构建失败** + +```bash +# 清理 Docker 缓存后重新构建 +docker system prune -a +docker build --no-cache -f assets/Dockerfile -t math-correction:latest . +``` + +### 2. **容器启动失败** + +```bash +# 查看详细错误日志 +docker logs math-correction + +# 进入容器调试 +docker exec -it math-correction bash +``` + +### 3. **API 调用 401/403 错误** + +**原因**:API 密钥无效或过期 + +**解决**: +1. 检查 `Dockerfile` 中的密钥是否正确 +2. 确认密钥是否有效且未过期 +3. 重新构建镜像(`docker build ...`) + +### 4. **图片下载失败** + +**原因**:图片 URL 不可访问 + +**解决**: +1. 检查图片 URL 是否有效 +2. 确认网络是否正常 +3. 检查防火墙设置 + +### 5. **内存不足** + +**现象**:容器频繁重启或 OOM + +**解决**: +```bash +# 增加 Docker 内存限制 +docker run -d \ + --name math-correction \ + --memory="8g" \ + -p 8000:8000 \ + math-correction:latest +``` + +--- + +## 📊 监控与日志 + +### 查看实时日志 + +```bash +# 容器日志 +docker logs -f math-correction + +# 查看最近 100 行 +docker logs --tail 100 math-correction + +# 查看错误日志 +docker logs math-correction | grep ERROR +``` + +### 性能监控 + +```bash +# 查看容器资源使用 +docker stats math-correction + +# 查看容器详情 +docker inspect math-correction +``` + +--- + +## 🛠️ 本地开发模式 + +如果不想使用 Docker,可以直接在本地运行: + +```bash +# 安装 Python 3.12 +python3.12 -m venv venv +source venv/bin/activate # Windows: venv\Scripts\activate + +# 安装依赖 +pip install -r requirements.txt + +# 设置环境变量(创建 .env 文件) +export COZE_API_KEY="Bearer YOUR_KEY_HERE" +export LLM_API_KEY="Bearer YOUR_KEY_HERE" +export COZE_WORKSPACE_ID=YOUR_WORKSPACE_ID + +# 运行 +python src/main.py -m http -p 8000 +``` + +--- + +## 📚 相关文档 + +- [LangGraph 官方文档](https://langchain-ai.github.io/langgraph/) +- [Coze API 文档](https://www.coze.cn/docs/developer_guides) +- [项目结构说明](./AGENTS.md) + +--- + +## ⚠️ 注意事项 + +1. **API 密钥安全**: + - ❌ 不要将密钥提交到代码仓库 + - ✅ 使用环境变量或密钥管理服务 + +2. **图片存储**: + - 推荐使用对象存储(如阿里云 OSS) + - 确保 URL 可公网访问 + +3. **并发限制**: + - 单图片超时:120 秒 + - 建议并发数:10-50(根据服务器配置调整) + +4. **数据隔离**: + - 不同学科的缓存目录独立 + - 建议定期清理缓存 + +--- + +## 🆘 技术支持 + +如遇问题,请提供以下信息: +1. Docker 版本:`docker --version` +2. 容器日志:`docker logs math-correction` +3. 错误信息截图 +4. 系统配置:`uname -a` + +--- + +## 📄 许可证 + +请遵循相关项目的许可证条款。 diff --git a/quick-start.sh b/quick-start.sh new file mode 100644 index 0000000..97b2e35 --- /dev/null +++ b/quick-start.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +# 数学作业批改系统 - 快速部署脚本 + +set -e + +echo "==========================================" +echo " 数学作业批改系统 - 快速部署" +echo "==========================================" +echo "" + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# 检查 Docker +if ! command -v docker &> /dev/null; then + echo -e "${RED}✗ Docker 未安装${NC}" + echo "请先安装 Docker: https://docs.docker.com/get-docker/" + exit 1 +fi + +echo -e "${GREEN}✓ Docker 已安装${NC}" +echo "" + +# 检查 API 密钥 +check_env_vars() { + local missing_vars=() + + # 从 Dockerfile 中提取环境变量 + if grep -q "YOUR_KEY_HERE\|YOUR_WORKSPACE_ID" assets/Dockerfile; then + echo -e "${YELLOW}⚠ 警告:检测到未配置的 API 密钥${NC}" + echo "" + echo "请在 assets/Dockerfile 中配置以下环境变量:" + echo " 1. COZE_WORKLOAD_IDENTITY_API_KEY" + echo " 2. LLM_API_KEY" + echo " 3. COZE_API_KEY" + echo " 4. COZE_INTEGRATION_API_KEY" + echo " 5. COZE_WORKSPACE_ID" + echo "" + read -p "是否继续构建?(将无法正常使用 LLM 功能)[y/N] " -n 1 -r + echo "" + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo -e "${RED}部署已取消${NC}" + exit 1 + fi + fi +} + +check_env_vars + +# 构建镜像 +echo -e "${YELLOW}正在构建 Docker 镜像...${NC}" +echo "这可能需要 5-10 分钟,请耐心等待..." +echo "" + +docker build -f assets/Dockerfile -t math-correction:latest . + +if [ $? -eq 0 ]; then + echo "" + echo -e "${GREEN}✓ 镜像构建成功!${NC}" + echo "" +else + echo "" + echo -e "${RED}✗ 镜像构建失败${NC}" + echo "请检查错误信息并重试" + exit 1 +fi + +# 运行容器 +echo -e "${YELLOW}正在启动容器...${NC}" +docker run -d \ + --name math-correction \ + -p 8000:8000 \ + --restart unless-stopped \ + math-correction:latest + +if [ $? -eq 0 ]; then + echo "" + echo -e "${GREEN}✓ 容器启动成功!${NC}" + echo "" +else + echo "" + echo -e "${RED}✗ 容器启动失败${NC}" + echo "请检查端口 8000 是否被占用" + exit 1 +fi + +# 等待服务启动 +echo -e "${YELLOW}等待服务启动...${NC}" +sleep 5 + +# 测试服务 +echo "" +echo -e "${YELLOW}正在测试服务...${NC}" +if curl -s http://localhost:8000/health > /dev/null 2>&1; then + echo -e "${GREEN}✓ 服务运行正常${NC}" +else + echo -e "${YELLOW}⚠ 服务可能还在启动中,请稍后测试${NC}" +fi + +# 显示容器信息 +echo "" +echo "==========================================" +echo " 部署完成!" +echo "==========================================" +echo "" +echo "容器信息:" +echo " 名称: math-correction" +echo " 端口: 8000" +echo " 状态: $(docker inspect --format='{{.State.Status}}' math-correction)" +echo "" +echo "常用命令:" +echo " 查看日志: docker logs -f math-correction" +echo " 停止容器: docker stop math-correction" +echo " 启动容器: docker start math-correction" +echo " 重启容器: docker restart math-correction" +echo " 删除容器: docker rm -f math-correction" +echo "" +echo "测试接口:" +echo " curl http://localhost:8000/health" +echo "" +echo "API 文档:" +echo " 请参考 DEPLOYMENT.md" +echo "" +echo -e "${GREEN}✓ 部署成功!${NC}" diff --git a/src/graphs/nodes/doc_extract_node.py b/src/graphs/nodes/doc_extract_node.py index 367a91d..cfc259e 100644 --- a/src/graphs/nodes/doc_extract_node.py +++ b/src/graphs/nodes/doc_extract_node.py @@ -251,20 +251,42 @@ def parse_answer_doc_with_llm(answer_doc_url: str, ctx, config: RunnableConfig) ] }}""" - client = LLMClient(ctx=ctx) - response = client.invoke( - messages=[HumanMessage(content=user_prompt)], - model=llm_config.get("model", "doubao-seed-2-0-pro-260215"), - temperature=llm_config.get("temperature", 0.1), - max_completion_tokens=llm_config.get("max_completion_tokens", 8192) - ) - - response_text = response.content if isinstance(response.content, str) else " ".join( - item.get("text", "") if isinstance(item, dict) else str(item) - for item in response.content - ).strip() - - logger.info(f"Word extract LLM response: {response_text[:1500]}") + # 3. 调用LLM解析(带错误处理) + try: + client = LLMClient(ctx=ctx) + response = client.invoke( + messages=[HumanMessage(content=user_prompt)], + model=llm_config.get("model", "doubao-seed-1-8-251228"), + temperature=llm_config.get("temperature", 0.1), + max_completion_tokens=llm_config.get("max_completion_tokens", 8192) + ) + + # 安全提取响应文本 + if isinstance(response.content, str): + response_text = response.content.strip() + elif isinstance(response.content, list): + # 处理列表格式响应 + text_parts = [] + for item in response.content: + if isinstance(item, str): + text_parts.append(item) + elif isinstance(item, dict) and item.get("type") == "text": + text_parts.append(item.get("text", "")) + response_text = " ".join(text_parts).strip() + else: + response_text = str(response.content).strip() + + # 检查响应是否为HTML错误页面 + if response_text.startswith("") or response_text.startswith("") or "404 Not Found" in response_text[:100]: + logger.error(f"LLM returned HTML error page instead of JSON: {response_text[:200]}") + raise ValueError(f"LLM API returned error: {response_text[:200]}") + + logger.info(f"Word extract LLM response: {response_text[:1500]}") + + except Exception as e: + logger.error(f"LLM parsing failed: {e}, will use teacher mode") + # 返回空列表,降级为老师批改模式 + return [] # 清理markdown标记 for prefix in ["```json", "```JSON", "```"]: diff --git a/src/graphs/nodes/recognize_and_correct_node.py b/src/graphs/nodes/recognize_and_correct_node.py index 309ab08..68e8fc2 100644 --- a/src/graphs/nodes/recognize_and_correct_node.py +++ b/src/graphs/nodes/recognize_and_correct_node.py @@ -263,21 +263,43 @@ def recognize_and_correct_node( ]) ] - client = LLMClient(ctx=ctx) - response = client.invoke( - messages=messages, - model=llm_config.get("model", "doubao-seed-2-0-pro-260215"), - temperature=llm_config.get("temperature", 0.0), - max_completion_tokens=llm_config.get("max_completion_tokens", 8192) - ) - - response_text = response.content if isinstance(response.content, str) else " ".join( - item.get("text", "") if isinstance(item, dict) else str(item) - for item in response.content - ).strip() - - # 只记录前500字符,避免日志过大 - logger.info(f"LLM response (first 500 chars): {response_text[:500]}") + # 4. 调用LLM批改(带错误处理) + try: + client = LLMClient(ctx=ctx) + response = client.invoke( + messages=messages, + model=llm_config.get("model", "doubao-seed-1-6-vision-250815"), + temperature=llm_config.get("temperature", 0.0), + max_completion_tokens=llm_config.get("max_completion_tokens", 8192) + ) + + # 安全提取响应文本 + if isinstance(response.content, str): + response_text = response.content.strip() + elif isinstance(response.content, list): + # 处理列表格式响应 + text_parts = [] + for item in response.content: + if isinstance(item, str): + text_parts.append(item) + elif isinstance(item, dict) and item.get("type") == "text": + text_parts.append(item.get("text", "")) + response_text = " ".join(text_parts).strip() + else: + response_text = str(response.content).strip() + + # 检查响应是否为HTML错误页面 + if response_text.startswith("") or response_text.startswith("") or "404 Not Found" in response_text[:100]: + logger.error(f"LLM returned HTML error page instead of JSON: {response_text[:200]}") + raise ValueError(f"LLM API returned error: {response_text[:200]}") + + # 只记录前500字符,避免日志过大 + logger.info(f"LLM response (first 500 chars): {response_text[:500]}") + + except Exception as e: + logger.error(f"LLM correction failed for image {image_url[:50]}...: {e}") + # 返回空结果,不影响其他图片处理 + return [] # 解析结果 result_dict = extract_json_from_text(response_text, "results")