yx_speech_to_text_flutter/test/speech_recognition_service....

518 lines
14 KiB
Dart
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import 'dart:async';
import 'dart:io';
import 'dart:math';
import 'package:flutter/foundation.dart';
import 'package:flutter/services.dart';
import 'package:path_provider/path_provider.dart';
import 'package:record/record.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart';
/// 识别状态枚举
enum RecognitionState {
idle,
processing,
listening,
error,
}
/// 识别结果类
class RecognitionResult {
final String text;
final double confidence;
final DateTime timestamp;
RecognitionResult({
required this.text,
required this.confidence,
required this.timestamp,
});
Map<String, dynamic> toJson() => {
'text': text,
'confidence': confidence,
'timestamp': timestamp.toIso8601String(),
};
}
/// 语音识别服务类
class SpeechRecognitionService extends ChangeNotifier {
// Sherpa-ONNX 相关
OnlineRecognizer? _recognizer;
OnlineStream? _stream;
// 录音相关
final AudioRecorder _recorder = AudioRecorder();
bool _isRecording = false;
// 状态管理
RecognitionState _state = RecognitionState.idle;
String _currentText = '';
String _finalText = '';
String _accumulatedText = ''; // 累积的所有识别文本
final double _confidence = 0.0;
final List<RecognitionResult> _history = [];
// 音频数据
final List<double> _audioLevels = [];
StreamSubscription? _audioSubscription;
// 错误信息
String? _errorMessage;
// 初始化状态
bool _isInitialized = false;
// Getters
RecognitionState get state => _state;
String get currentText => _currentText;
String get finalText => _finalText;
String get accumulatedText => _accumulatedText;
double get confidence => _confidence;
List<RecognitionResult> get history => List.unmodifiable(_history);
List<double> get audioLevels => List.unmodifiable(_audioLevels);
String? get errorMessage => _errorMessage;
bool get isRecording => _isRecording;
bool get isInitialized => _isInitialized;
/// 初始化语音识别服务
Future<bool> initialize({
String modelPath =
'assets/models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23',
String tokensPath = '',
int sampleRate = 16000,
}) async {
try {
_setState(RecognitionState.processing);
// 检查并复制模型文件
final modelDir = await _prepareModelFiles(modelPath);
// 验证模型文件 (使用 int8 量化版本以提升性能)
final encoderPath = '$modelDir/encoder-epoch-99-avg-1.int8.onnx';
final decoderPath = '$modelDir/decoder-epoch-99-avg-1.int8.onnx';
final joinerPath = '$modelDir/joiner-epoch-99-avg-1.int8.onnx';
final tokensFilePath = '$modelDir/tokens.txt';
if (!await File(encoderPath).exists() ||
!await File(decoderPath).exists() ||
!await File(joinerPath).exists() ||
!await File(tokensFilePath).exists()) {
throw Exception('模型文件不完整');
}
// 尝试触发 sherpa_onnx 的自动初始化
debugPrint('🚀 准备创建 OnlineRecognizer...');
debugPrint('📁 编码器路径: $encoderPath');
debugPrint('📁 解码器路径: $decoderPath');
debugPrint('📁 连接器路径: $joinerPath');
debugPrint('📁 词表路径: $tokensFilePath');
// 创建真实的 OnlineRecognizer
try {
// 尝试简化的配置
final config = OnlineRecognizerConfig(
model: OnlineModelConfig(
transducer: OnlineTransducerModelConfig(
encoder: encoderPath,
decoder: decoderPath,
joiner: joinerPath,
),
tokens: tokensFilePath,
),
);
_recognizer = OnlineRecognizer(config);
_isInitialized = true;
debugPrint('✅ Sherpa-ONNX 识别器创建成功');
} catch (e) {
debugPrint('❌ 创建识别器失败: $e');
throw Exception('Sherpa-ONNX 识别器初始化失败: $e');
}
debugPrint('✅ 语音识别服务初始化成功');
debugPrint('📁 模型目录: $modelDir');
debugPrint('🎤 采样率: ${sampleRate}Hz');
_setState(RecognitionState.idle);
_clearError();
return true;
} catch (e) {
_setError('初始化失败: $e');
_setState(RecognitionState.error);
_isInitialized = false;
return false;
}
}
/// 准备模型文件
Future<String> _prepareModelFiles(String assetPath) async {
final appDir = await getApplicationDocumentsDirectory();
final modelDir = Directory(
'${appDir.path}/models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23');
if (!await modelDir.exists()) {
await modelDir.create(recursive: true);
}
// 复制中文 ASR 模型的所有必要文件 (使用 int8 量化版本)
final files = [
'encoder-epoch-99-avg-1.int8.onnx',
'decoder-epoch-99-avg-1.int8.onnx',
'joiner-epoch-99-avg-1.int8.onnx',
'tokens.txt',
];
debugPrint('开始复制模型文件到: ${modelDir.path}');
for (final file in files) {
final assetFile = '$assetPath/$file';
final targetFile = File('${modelDir.path}/$file');
if (!await targetFile.exists()) {
try {
debugPrint('复制文件: $assetFile -> ${targetFile.path}');
final data = await rootBundle.load(assetFile);
await targetFile.writeAsBytes(data.buffer.asUint8List());
debugPrint('✅ 复制成功: $file');
} catch (e) {
debugPrint('❌ 无法复制模型文件 $file: $e');
throw Exception('模型文件复制失败: $file');
}
} else {
debugPrint('⏭️ 文件已存在,跳过: $file');
}
}
debugPrint('模型文件准备完成,返回路径: ${modelDir.path}');
return modelDir.path;
}
/// 开始语音识别
Future<bool> startRecognition() async {
if (!_isInitialized) {
_setError('请先初始化语音识别服务');
return false;
}
if (_isRecording) {
debugPrint('已经在录音中');
return true;
}
try {
_setState(RecognitionState.processing);
// 创建新的识别流
if (_recognizer != null) {
_stream = _recognizer!.createStream();
debugPrint('✅ 创建识别流成功');
} else {
// TODO: 集成真实 API 后移除此分支
_stream = null;
debugPrint('⚠️ 等待真实 API 集成');
}
// 开始录音流
if (await _recorder.hasPermission()) {
final recordStream = await _recorder.startStream(
const RecordConfig(
encoder: AudioEncoder.pcm16bits,
sampleRate: 16000,
numChannels: 1,
),
);
_isRecording = true;
_setState(RecognitionState.listening);
_clearCurrentText();
debugPrint('🎤 开始录音流');
// 监听实时音频数据
_audioSubscription = recordStream.listen(
(audioData) {
_processAudioData(audioData);
},
onError: (error) {
debugPrint('❌ 音频流错误: $error');
_setError('音频流错误: $error');
},
onDone: () {
debugPrint('🔄 音频流结束');
},
);
// 开始音频处理
_startAudioProcessing();
return true;
} else {
_setError('没有录音权限');
return false;
}
} catch (e) {
_setError('开始录音失败: $e');
_setState(RecognitionState.error);
return false;
}
}
/// 停止语音识别
Future<void> stopRecognition() async {
if (!_isRecording) return;
try {
_setState(RecognitionState.processing);
// 停止录音流
await _recorder.stop();
_isRecording = false;
// 停止音频流处理
await _audioSubscription?.cancel();
_audioSubscription = null;
debugPrint('🔄 录音结束,获取最终识别结果...');
// 获取识别结果
if (_recognizer != null && _stream != null) {
try {
final result = _recognizer!.getResult(_stream!);
if (result.text.isNotEmpty) {
debugPrint('✅ 识别成功: ${result.text}');
_appendToAccumulatedText(result.text);
_addToHistory(result.text, 1.0);
} else {
debugPrint('⚠️ 未识别到语音内容');
// 不追加空识别结果到累积文本
}
// 重置流,准备下次识别
_stream = null;
} catch (e) {
debugPrint('❌ 获取识别结果失败: $e');
_setError('识别失败: $e');
}
} else {
debugPrint('❌ 识别器未初始化');
_setError('识别器未初始化');
}
_setState(RecognitionState.idle);
} catch (e) {
_setError('停止录音失败: $e');
_setState(RecognitionState.error);
}
}
/// 开始音频流处理
void _startAudioProcessing() {
if (_recognizer == null || _stream == null) return;
// 创建定时器处理音频流(实时获取识别结果)
Timer.periodic(const Duration(milliseconds: 200), (timer) {
if (!_isRecording) {
timer.cancel();
return;
}
try {
// 检查识别器是否准备好处理音频
if (_recognizer!.isReady(_stream!)) {
// 解码音频流
_recognizer!.decode(_stream!);
// 获取实时识别结果
final result = _recognizer!.getResult(_stream!);
if (result.text.isNotEmpty && result.text != _currentText) {
_setCurrentText(result.text);
debugPrint('🎤 实时识别: ${result.text}');
// 通知 UI 更新
notifyListeners();
}
// 检查是否到达语音端点
if (_recognizer!.isEndpoint(_stream!)) {
debugPrint('🎯 检测到语音端点');
// 获取最终结果并重置流
final finalResult = _recognizer!.getResult(_stream!);
if (finalResult.text.isNotEmpty) {
_appendToAccumulatedText(finalResult.text);
_addToHistory(finalResult.text, 1.0);
debugPrint('✅ 语音段落结束: ${finalResult.text}');
}
// 重置流以准备下一段语音
_recognizer!.reset(_stream!);
}
}
} catch (e) {
debugPrint('❌ 音频处理错误: $e');
}
});
}
/// 处理实时音频数据
void _processAudioData(Uint8List audioData) {
if (_recognizer == null || _stream == null || !_isRecording) {
return;
}
try {
// 将音频字节数据转换为 16-bit PCM float32 样本
final samples = _convertAudioDataToSamples(audioData);
if (samples.isNotEmpty) {
// 输入音频数据到识别器
_stream!.acceptWaveform(sampleRate: 16000, samples: samples);
// 更新音频电平显示
_updateAudioLevelFromSamples(samples);
debugPrint('🎵 处理音频数据: ${samples.length} 样本');
}
} catch (e) {
debugPrint('❌ 处理音频数据错误: $e');
}
}
/// 将音频字节数据转换为 Float32 样本
Float32List _convertAudioDataToSamples(Uint8List audioData) {
// PCM 16-bit 数据转换为 float32 样本
// 每个样本占用 2 字节 (16-bit)
final sampleCount = audioData.length ~/ 2;
final samples = Float32List(sampleCount);
for (int i = 0; i < sampleCount; i++) {
// 读取 16-bit little-endian 整数
final sample16 = (audioData[i * 2 + 1] << 8) | audioData[i * 2];
// 转换为有符号 16-bit 整数
final signedSample = sample16 > 32767 ? sample16 - 65536 : sample16;
// 归一化到 [-1.0, 1.0] 范围
samples[i] = signedSample / 32768.0;
}
return samples;
}
/// 从音频样本更新音频电平
void _updateAudioLevelFromSamples(Float32List samples) {
if (samples.isEmpty) return;
// 计算 RMS (Root Mean Square) 电平
double sumSquares = 0.0;
for (final sample in samples) {
sumSquares += sample * sample;
}
final rmsLevel = sqrt(sumSquares / samples.length);
// 转换为分贝并归一化到 [0.0, 1.0]
final dbLevel = 20 * log(rmsLevel) / ln10;
final normalizedLevel = (dbLevel + 60) / 60; // 假设 -60dB 到 0dB 范围
final clampedLevel = normalizedLevel.clamp(0.0, 1.0);
// 添加到音频电平列表
if (_audioLevels.length >= 50) {
_audioLevels.removeAt(0);
}
_audioLevels.add(clampedLevel);
notifyListeners();
}
/// 清除识别历史
void clearHistory() {
_history.clear();
notifyListeners();
}
/// 设置当前文本
void _setCurrentText(String text) {
_currentText = text;
notifyListeners();
}
/// 追加文本到累积文本中
void _appendToAccumulatedText(String text) {
if (text.trim().isEmpty) return;
if (_accumulatedText.isEmpty) {
_accumulatedText = text;
} else {
// 添加适当的分隔符
_accumulatedText += _accumulatedText.endsWith('') ||
_accumulatedText.endsWith('') ||
_accumulatedText.endsWith('')
? ' '
: '';
_accumulatedText += text;
}
// 同时更新最终文本为累积文本
_finalText = _accumulatedText;
_currentText = ''; // 清除当前文本
notifyListeners();
}
/// 清除当前文本
void _clearCurrentText() {
_currentText = '';
notifyListeners();
}
/// 清除累积文本
void clearAccumulatedText() {
_accumulatedText = '';
_finalText = '';
_currentText = '';
notifyListeners();
}
/// 添加到历史记录
void _addToHistory(String text, double confidence) {
final result = RecognitionResult(
text: text,
confidence: confidence,
timestamp: DateTime.now(),
);
_history.insert(0, result);
notifyListeners();
}
/// 设置状态
void _setState(RecognitionState state) {
_state = state;
notifyListeners();
}
/// 设置错误
void _setError(String error) {
_errorMessage = error;
debugPrint('❌ 错误: $error');
notifyListeners();
}
/// 清除错误
void _clearError() {
_errorMessage = null;
notifyListeners();
}
@override
void dispose() {
stopRecognition();
_audioSubscription?.cancel();
_recorder.dispose();
_recognizer = null;
super.dispose();
}
}