import 'dart:async'; import 'dart:io'; import 'dart:math'; import 'package:flutter/foundation.dart'; import 'package:flutter/services.dart'; import 'package:path_provider/path_provider.dart'; import 'package:record/record.dart'; import 'package:sherpa_onnx/sherpa_onnx.dart'; /// 识别状态枚举 enum RecognitionState { idle, processing, listening, error, } /// 识别结果类 class RecognitionResult { final String text; final double confidence; final DateTime timestamp; RecognitionResult({ required this.text, required this.confidence, required this.timestamp, }); Map toJson() => { 'text': text, 'confidence': confidence, 'timestamp': timestamp.toIso8601String(), }; } /// 语音识别服务类 class SpeechRecognitionService extends ChangeNotifier { // Sherpa-ONNX 相关 OnlineRecognizer? _recognizer; OnlineStream? _stream; // 录音相关 final AudioRecorder _recorder = AudioRecorder(); bool _isRecording = false; // 状态管理 RecognitionState _state = RecognitionState.idle; String _currentText = ''; String _finalText = ''; String _accumulatedText = ''; // 累积的所有识别文本 final double _confidence = 0.0; final List _history = []; // 音频数据 final List _audioLevels = []; StreamSubscription? _audioSubscription; // 错误信息 String? _errorMessage; // 初始化状态 bool _isInitialized = false; // Getters RecognitionState get state => _state; String get currentText => _currentText; String get finalText => _finalText; String get accumulatedText => _accumulatedText; double get confidence => _confidence; List get history => List.unmodifiable(_history); List get audioLevels => List.unmodifiable(_audioLevels); String? get errorMessage => _errorMessage; bool get isRecording => _isRecording; bool get isInitialized => _isInitialized; /// 初始化语音识别服务 Future initialize({ String modelPath = 'assets/models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23', String tokensPath = '', int sampleRate = 16000, }) async { try { _setState(RecognitionState.processing); // 检查并复制模型文件 final modelDir = await _prepareModelFiles(modelPath); // 验证模型文件 (使用 int8 量化版本以提升性能) final encoderPath = '$modelDir/encoder-epoch-99-avg-1.int8.onnx'; final decoderPath = '$modelDir/decoder-epoch-99-avg-1.int8.onnx'; final joinerPath = '$modelDir/joiner-epoch-99-avg-1.int8.onnx'; final tokensFilePath = '$modelDir/tokens.txt'; if (!await File(encoderPath).exists() || !await File(decoderPath).exists() || !await File(joinerPath).exists() || !await File(tokensFilePath).exists()) { throw Exception('模型文件不完整'); } // 尝试触发 sherpa_onnx 的自动初始化 debugPrint('🚀 准备创建 OnlineRecognizer...'); debugPrint('📁 编码器路径: $encoderPath'); debugPrint('📁 解码器路径: $decoderPath'); debugPrint('📁 连接器路径: $joinerPath'); debugPrint('📁 词表路径: $tokensFilePath'); // 创建真实的 OnlineRecognizer try { // 尝试简化的配置 final config = OnlineRecognizerConfig( model: OnlineModelConfig( transducer: OnlineTransducerModelConfig( encoder: encoderPath, decoder: decoderPath, joiner: joinerPath, ), tokens: tokensFilePath, ), ); _recognizer = OnlineRecognizer(config); _isInitialized = true; debugPrint('✅ Sherpa-ONNX 识别器创建成功'); } catch (e) { debugPrint('❌ 创建识别器失败: $e'); throw Exception('Sherpa-ONNX 识别器初始化失败: $e'); } debugPrint('✅ 语音识别服务初始化成功'); debugPrint('📁 模型目录: $modelDir'); debugPrint('🎤 采样率: ${sampleRate}Hz'); _setState(RecognitionState.idle); _clearError(); return true; } catch (e) { _setError('初始化失败: $e'); _setState(RecognitionState.error); _isInitialized = false; return false; } } /// 准备模型文件 Future _prepareModelFiles(String assetPath) async { final appDir = await getApplicationDocumentsDirectory(); final modelDir = Directory( '${appDir.path}/models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23'); if (!await modelDir.exists()) { await modelDir.create(recursive: true); } // 复制中文 ASR 模型的所有必要文件 (使用 int8 量化版本) final files = [ 'encoder-epoch-99-avg-1.int8.onnx', 'decoder-epoch-99-avg-1.int8.onnx', 'joiner-epoch-99-avg-1.int8.onnx', 'tokens.txt', ]; debugPrint('开始复制模型文件到: ${modelDir.path}'); for (final file in files) { final assetFile = '$assetPath/$file'; final targetFile = File('${modelDir.path}/$file'); if (!await targetFile.exists()) { try { debugPrint('复制文件: $assetFile -> ${targetFile.path}'); final data = await rootBundle.load(assetFile); await targetFile.writeAsBytes(data.buffer.asUint8List()); debugPrint('✅ 复制成功: $file'); } catch (e) { debugPrint('❌ 无法复制模型文件 $file: $e'); throw Exception('模型文件复制失败: $file'); } } else { debugPrint('⏭️ 文件已存在,跳过: $file'); } } debugPrint('模型文件准备完成,返回路径: ${modelDir.path}'); return modelDir.path; } /// 开始语音识别 Future startRecognition() async { if (!_isInitialized) { _setError('请先初始化语音识别服务'); return false; } if (_isRecording) { debugPrint('已经在录音中'); return true; } try { _setState(RecognitionState.processing); // 创建新的识别流 if (_recognizer != null) { _stream = _recognizer!.createStream(); debugPrint('✅ 创建识别流成功'); } else { // TODO: 集成真实 API 后移除此分支 _stream = null; debugPrint('⚠️ 等待真实 API 集成'); } // 开始录音流 if (await _recorder.hasPermission()) { final recordStream = await _recorder.startStream( const RecordConfig( encoder: AudioEncoder.pcm16bits, sampleRate: 16000, numChannels: 1, ), ); _isRecording = true; _setState(RecognitionState.listening); _clearCurrentText(); debugPrint('🎤 开始录音流'); // 监听实时音频数据 _audioSubscription = recordStream.listen( (audioData) { _processAudioData(audioData); }, onError: (error) { debugPrint('❌ 音频流错误: $error'); _setError('音频流错误: $error'); }, onDone: () { debugPrint('🔄 音频流结束'); }, ); // 开始音频处理 _startAudioProcessing(); return true; } else { _setError('没有录音权限'); return false; } } catch (e) { _setError('开始录音失败: $e'); _setState(RecognitionState.error); return false; } } /// 停止语音识别 Future stopRecognition() async { if (!_isRecording) return; try { _setState(RecognitionState.processing); // 停止录音流 await _recorder.stop(); _isRecording = false; // 停止音频流处理 await _audioSubscription?.cancel(); _audioSubscription = null; debugPrint('🔄 录音结束,获取最终识别结果...'); // 获取识别结果 if (_recognizer != null && _stream != null) { try { final result = _recognizer!.getResult(_stream!); if (result.text.isNotEmpty) { debugPrint('✅ 识别成功: ${result.text}'); _appendToAccumulatedText(result.text); _addToHistory(result.text, 1.0); } else { debugPrint('⚠️ 未识别到语音内容'); // 不追加空识别结果到累积文本 } // 重置流,准备下次识别 _stream = null; } catch (e) { debugPrint('❌ 获取识别结果失败: $e'); _setError('识别失败: $e'); } } else { debugPrint('❌ 识别器未初始化'); _setError('识别器未初始化'); } _setState(RecognitionState.idle); } catch (e) { _setError('停止录音失败: $e'); _setState(RecognitionState.error); } } /// 开始音频流处理 void _startAudioProcessing() { if (_recognizer == null || _stream == null) return; // 创建定时器处理音频流(实时获取识别结果) Timer.periodic(const Duration(milliseconds: 200), (timer) { if (!_isRecording) { timer.cancel(); return; } try { // 检查识别器是否准备好处理音频 if (_recognizer!.isReady(_stream!)) { // 解码音频流 _recognizer!.decode(_stream!); // 获取实时识别结果 final result = _recognizer!.getResult(_stream!); if (result.text.isNotEmpty && result.text != _currentText) { _setCurrentText(result.text); debugPrint('🎤 实时识别: ${result.text}'); // 通知 UI 更新 notifyListeners(); } // 检查是否到达语音端点 if (_recognizer!.isEndpoint(_stream!)) { debugPrint('🎯 检测到语音端点'); // 获取最终结果并重置流 final finalResult = _recognizer!.getResult(_stream!); if (finalResult.text.isNotEmpty) { _appendToAccumulatedText(finalResult.text); _addToHistory(finalResult.text, 1.0); debugPrint('✅ 语音段落结束: ${finalResult.text}'); } // 重置流以准备下一段语音 _recognizer!.reset(_stream!); } } } catch (e) { debugPrint('❌ 音频处理错误: $e'); } }); } /// 处理实时音频数据 void _processAudioData(Uint8List audioData) { if (_recognizer == null || _stream == null || !_isRecording) { return; } try { // 将音频字节数据转换为 16-bit PCM float32 样本 final samples = _convertAudioDataToSamples(audioData); if (samples.isNotEmpty) { // 输入音频数据到识别器 _stream!.acceptWaveform(sampleRate: 16000, samples: samples); // 更新音频电平显示 _updateAudioLevelFromSamples(samples); debugPrint('🎵 处理音频数据: ${samples.length} 样本'); } } catch (e) { debugPrint('❌ 处理音频数据错误: $e'); } } /// 将音频字节数据转换为 Float32 样本 Float32List _convertAudioDataToSamples(Uint8List audioData) { // PCM 16-bit 数据转换为 float32 样本 // 每个样本占用 2 字节 (16-bit) final sampleCount = audioData.length ~/ 2; final samples = Float32List(sampleCount); for (int i = 0; i < sampleCount; i++) { // 读取 16-bit little-endian 整数 final sample16 = (audioData[i * 2 + 1] << 8) | audioData[i * 2]; // 转换为有符号 16-bit 整数 final signedSample = sample16 > 32767 ? sample16 - 65536 : sample16; // 归一化到 [-1.0, 1.0] 范围 samples[i] = signedSample / 32768.0; } return samples; } /// 从音频样本更新音频电平 void _updateAudioLevelFromSamples(Float32List samples) { if (samples.isEmpty) return; // 计算 RMS (Root Mean Square) 电平 double sumSquares = 0.0; for (final sample in samples) { sumSquares += sample * sample; } final rmsLevel = sqrt(sumSquares / samples.length); // 转换为分贝并归一化到 [0.0, 1.0] final dbLevel = 20 * log(rmsLevel) / ln10; final normalizedLevel = (dbLevel + 60) / 60; // 假设 -60dB 到 0dB 范围 final clampedLevel = normalizedLevel.clamp(0.0, 1.0); // 添加到音频电平列表 if (_audioLevels.length >= 50) { _audioLevels.removeAt(0); } _audioLevels.add(clampedLevel); notifyListeners(); } /// 清除识别历史 void clearHistory() { _history.clear(); notifyListeners(); } /// 设置当前文本 void _setCurrentText(String text) { _currentText = text; notifyListeners(); } /// 追加文本到累积文本中 void _appendToAccumulatedText(String text) { if (text.trim().isEmpty) return; if (_accumulatedText.isEmpty) { _accumulatedText = text; } else { // 添加适当的分隔符 _accumulatedText += _accumulatedText.endsWith('。') || _accumulatedText.endsWith('!') || _accumulatedText.endsWith('?') ? ' ' : ','; _accumulatedText += text; } // 同时更新最终文本为累积文本 _finalText = _accumulatedText; _currentText = ''; // 清除当前文本 notifyListeners(); } /// 清除当前文本 void _clearCurrentText() { _currentText = ''; notifyListeners(); } /// 清除累积文本 void clearAccumulatedText() { _accumulatedText = ''; _finalText = ''; _currentText = ''; notifyListeners(); } /// 添加到历史记录 void _addToHistory(String text, double confidence) { final result = RecognitionResult( text: text, confidence: confidence, timestamp: DateTime.now(), ); _history.insert(0, result); notifyListeners(); } /// 设置状态 void _setState(RecognitionState state) { _state = state; notifyListeners(); } /// 设置错误 void _setError(String error) { _errorMessage = error; debugPrint('❌ 错误: $error'); notifyListeners(); } /// 清除错误 void _clearError() { _errorMessage = null; notifyListeners(); } @override void dispose() { stopRecognition(); _audioSubscription?.cancel(); _recorder.dispose(); _recognizer = null; super.dispose(); } }