518 lines
14 KiB
Dart
518 lines
14 KiB
Dart
import 'dart:async';
|
||
import 'dart:io';
|
||
import 'dart:math';
|
||
import 'package:flutter/foundation.dart';
|
||
import 'package:flutter/services.dart';
|
||
import 'package:path_provider/path_provider.dart';
|
||
import 'package:record/record.dart';
|
||
import 'package:sherpa_onnx/sherpa_onnx.dart';
|
||
|
||
/// 识别状态枚举
|
||
enum RecognitionState {
|
||
idle,
|
||
processing,
|
||
listening,
|
||
error,
|
||
}
|
||
|
||
/// 识别结果类
|
||
class RecognitionResult {
|
||
final String text;
|
||
final double confidence;
|
||
final DateTime timestamp;
|
||
|
||
RecognitionResult({
|
||
required this.text,
|
||
required this.confidence,
|
||
required this.timestamp,
|
||
});
|
||
|
||
Map<String, dynamic> toJson() => {
|
||
'text': text,
|
||
'confidence': confidence,
|
||
'timestamp': timestamp.toIso8601String(),
|
||
};
|
||
}
|
||
|
||
/// 语音识别服务类
|
||
class SpeechRecognitionService extends ChangeNotifier {
|
||
// Sherpa-ONNX 相关
|
||
OnlineRecognizer? _recognizer;
|
||
OnlineStream? _stream;
|
||
|
||
// 录音相关
|
||
final AudioRecorder _recorder = AudioRecorder();
|
||
bool _isRecording = false;
|
||
|
||
// 状态管理
|
||
RecognitionState _state = RecognitionState.idle;
|
||
String _currentText = '';
|
||
String _finalText = '';
|
||
String _accumulatedText = ''; // 累积的所有识别文本
|
||
final double _confidence = 0.0;
|
||
final List<RecognitionResult> _history = [];
|
||
|
||
// 音频数据
|
||
final List<double> _audioLevels = [];
|
||
StreamSubscription? _audioSubscription;
|
||
|
||
// 错误信息
|
||
String? _errorMessage;
|
||
|
||
// 初始化状态
|
||
bool _isInitialized = false;
|
||
|
||
// Getters
|
||
RecognitionState get state => _state;
|
||
String get currentText => _currentText;
|
||
String get finalText => _finalText;
|
||
String get accumulatedText => _accumulatedText;
|
||
double get confidence => _confidence;
|
||
List<RecognitionResult> get history => List.unmodifiable(_history);
|
||
List<double> get audioLevels => List.unmodifiable(_audioLevels);
|
||
String? get errorMessage => _errorMessage;
|
||
bool get isRecording => _isRecording;
|
||
bool get isInitialized => _isInitialized;
|
||
|
||
/// 初始化语音识别服务
|
||
Future<bool> initialize({
|
||
String modelPath =
|
||
'assets/models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23',
|
||
String tokensPath = '',
|
||
int sampleRate = 16000,
|
||
}) async {
|
||
try {
|
||
_setState(RecognitionState.processing);
|
||
|
||
// 检查并复制模型文件
|
||
final modelDir = await _prepareModelFiles(modelPath);
|
||
|
||
// 验证模型文件 (使用 int8 量化版本以提升性能)
|
||
final encoderPath = '$modelDir/encoder-epoch-99-avg-1.int8.onnx';
|
||
final decoderPath = '$modelDir/decoder-epoch-99-avg-1.int8.onnx';
|
||
final joinerPath = '$modelDir/joiner-epoch-99-avg-1.int8.onnx';
|
||
final tokensFilePath = '$modelDir/tokens.txt';
|
||
|
||
if (!await File(encoderPath).exists() ||
|
||
!await File(decoderPath).exists() ||
|
||
!await File(joinerPath).exists() ||
|
||
!await File(tokensFilePath).exists()) {
|
||
throw Exception('模型文件不完整');
|
||
}
|
||
|
||
// 尝试触发 sherpa_onnx 的自动初始化
|
||
debugPrint('🚀 准备创建 OnlineRecognizer...');
|
||
debugPrint('📁 编码器路径: $encoderPath');
|
||
debugPrint('📁 解码器路径: $decoderPath');
|
||
debugPrint('📁 连接器路径: $joinerPath');
|
||
debugPrint('📁 词表路径: $tokensFilePath');
|
||
|
||
// 创建真实的 OnlineRecognizer
|
||
try {
|
||
// 尝试简化的配置
|
||
final config = OnlineRecognizerConfig(
|
||
model: OnlineModelConfig(
|
||
transducer: OnlineTransducerModelConfig(
|
||
encoder: encoderPath,
|
||
decoder: decoderPath,
|
||
joiner: joinerPath,
|
||
),
|
||
tokens: tokensFilePath,
|
||
),
|
||
);
|
||
|
||
_recognizer = OnlineRecognizer(config);
|
||
_isInitialized = true;
|
||
|
||
debugPrint('✅ Sherpa-ONNX 识别器创建成功');
|
||
} catch (e) {
|
||
debugPrint('❌ 创建识别器失败: $e');
|
||
throw Exception('Sherpa-ONNX 识别器初始化失败: $e');
|
||
}
|
||
|
||
debugPrint('✅ 语音识别服务初始化成功');
|
||
debugPrint('📁 模型目录: $modelDir');
|
||
debugPrint('🎤 采样率: ${sampleRate}Hz');
|
||
|
||
_setState(RecognitionState.idle);
|
||
_clearError();
|
||
|
||
return true;
|
||
} catch (e) {
|
||
_setError('初始化失败: $e');
|
||
_setState(RecognitionState.error);
|
||
_isInitialized = false;
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/// 准备模型文件
|
||
Future<String> _prepareModelFiles(String assetPath) async {
|
||
final appDir = await getApplicationDocumentsDirectory();
|
||
final modelDir = Directory(
|
||
'${appDir.path}/models/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23');
|
||
|
||
if (!await modelDir.exists()) {
|
||
await modelDir.create(recursive: true);
|
||
}
|
||
|
||
// 复制中文 ASR 模型的所有必要文件 (使用 int8 量化版本)
|
||
final files = [
|
||
'encoder-epoch-99-avg-1.int8.onnx',
|
||
'decoder-epoch-99-avg-1.int8.onnx',
|
||
'joiner-epoch-99-avg-1.int8.onnx',
|
||
'tokens.txt',
|
||
];
|
||
|
||
debugPrint('开始复制模型文件到: ${modelDir.path}');
|
||
|
||
for (final file in files) {
|
||
final assetFile = '$assetPath/$file';
|
||
final targetFile = File('${modelDir.path}/$file');
|
||
|
||
if (!await targetFile.exists()) {
|
||
try {
|
||
debugPrint('复制文件: $assetFile -> ${targetFile.path}');
|
||
final data = await rootBundle.load(assetFile);
|
||
await targetFile.writeAsBytes(data.buffer.asUint8List());
|
||
debugPrint('✅ 复制成功: $file');
|
||
} catch (e) {
|
||
debugPrint('❌ 无法复制模型文件 $file: $e');
|
||
throw Exception('模型文件复制失败: $file');
|
||
}
|
||
} else {
|
||
debugPrint('⏭️ 文件已存在,跳过: $file');
|
||
}
|
||
}
|
||
|
||
debugPrint('模型文件准备完成,返回路径: ${modelDir.path}');
|
||
return modelDir.path;
|
||
}
|
||
|
||
/// 开始语音识别
|
||
Future<bool> startRecognition() async {
|
||
if (!_isInitialized) {
|
||
_setError('请先初始化语音识别服务');
|
||
return false;
|
||
}
|
||
|
||
if (_isRecording) {
|
||
debugPrint('已经在录音中');
|
||
return true;
|
||
}
|
||
|
||
try {
|
||
_setState(RecognitionState.processing);
|
||
|
||
// 创建新的识别流
|
||
if (_recognizer != null) {
|
||
_stream = _recognizer!.createStream();
|
||
debugPrint('✅ 创建识别流成功');
|
||
} else {
|
||
// TODO: 集成真实 API 后移除此分支
|
||
_stream = null;
|
||
debugPrint('⚠️ 等待真实 API 集成');
|
||
}
|
||
|
||
// 开始录音流
|
||
if (await _recorder.hasPermission()) {
|
||
final recordStream = await _recorder.startStream(
|
||
const RecordConfig(
|
||
encoder: AudioEncoder.pcm16bits,
|
||
sampleRate: 16000,
|
||
numChannels: 1,
|
||
),
|
||
);
|
||
|
||
_isRecording = true;
|
||
_setState(RecognitionState.listening);
|
||
_clearCurrentText();
|
||
|
||
debugPrint('🎤 开始录音流');
|
||
|
||
// 监听实时音频数据
|
||
_audioSubscription = recordStream.listen(
|
||
(audioData) {
|
||
_processAudioData(audioData);
|
||
},
|
||
onError: (error) {
|
||
debugPrint('❌ 音频流错误: $error');
|
||
_setError('音频流错误: $error');
|
||
},
|
||
onDone: () {
|
||
debugPrint('🔄 音频流结束');
|
||
},
|
||
);
|
||
|
||
// 开始音频处理
|
||
_startAudioProcessing();
|
||
|
||
return true;
|
||
} else {
|
||
_setError('没有录音权限');
|
||
return false;
|
||
}
|
||
} catch (e) {
|
||
_setError('开始录音失败: $e');
|
||
_setState(RecognitionState.error);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/// 停止语音识别
|
||
Future<void> stopRecognition() async {
|
||
if (!_isRecording) return;
|
||
|
||
try {
|
||
_setState(RecognitionState.processing);
|
||
|
||
// 停止录音流
|
||
await _recorder.stop();
|
||
_isRecording = false;
|
||
|
||
// 停止音频流处理
|
||
await _audioSubscription?.cancel();
|
||
_audioSubscription = null;
|
||
|
||
debugPrint('🔄 录音结束,获取最终识别结果...');
|
||
|
||
// 获取识别结果
|
||
if (_recognizer != null && _stream != null) {
|
||
try {
|
||
final result = _recognizer!.getResult(_stream!);
|
||
|
||
if (result.text.isNotEmpty) {
|
||
debugPrint('✅ 识别成功: ${result.text}');
|
||
_appendToAccumulatedText(result.text);
|
||
_addToHistory(result.text, 1.0);
|
||
} else {
|
||
debugPrint('⚠️ 未识别到语音内容');
|
||
// 不追加空识别结果到累积文本
|
||
}
|
||
|
||
// 重置流,准备下次识别
|
||
_stream = null;
|
||
} catch (e) {
|
||
debugPrint('❌ 获取识别结果失败: $e');
|
||
_setError('识别失败: $e');
|
||
}
|
||
} else {
|
||
debugPrint('❌ 识别器未初始化');
|
||
_setError('识别器未初始化');
|
||
}
|
||
|
||
_setState(RecognitionState.idle);
|
||
} catch (e) {
|
||
_setError('停止录音失败: $e');
|
||
_setState(RecognitionState.error);
|
||
}
|
||
}
|
||
|
||
/// 开始音频流处理
|
||
void _startAudioProcessing() {
|
||
if (_recognizer == null || _stream == null) return;
|
||
|
||
// 创建定时器处理音频流(实时获取识别结果)
|
||
Timer.periodic(const Duration(milliseconds: 200), (timer) {
|
||
if (!_isRecording) {
|
||
timer.cancel();
|
||
return;
|
||
}
|
||
|
||
try {
|
||
// 检查识别器是否准备好处理音频
|
||
if (_recognizer!.isReady(_stream!)) {
|
||
// 解码音频流
|
||
_recognizer!.decode(_stream!);
|
||
|
||
// 获取实时识别结果
|
||
final result = _recognizer!.getResult(_stream!);
|
||
|
||
if (result.text.isNotEmpty && result.text != _currentText) {
|
||
_setCurrentText(result.text);
|
||
debugPrint('🎤 实时识别: ${result.text}');
|
||
|
||
// 通知 UI 更新
|
||
notifyListeners();
|
||
}
|
||
|
||
// 检查是否到达语音端点
|
||
if (_recognizer!.isEndpoint(_stream!)) {
|
||
debugPrint('🎯 检测到语音端点');
|
||
|
||
// 获取最终结果并重置流
|
||
final finalResult = _recognizer!.getResult(_stream!);
|
||
if (finalResult.text.isNotEmpty) {
|
||
_appendToAccumulatedText(finalResult.text);
|
||
_addToHistory(finalResult.text, 1.0);
|
||
debugPrint('✅ 语音段落结束: ${finalResult.text}');
|
||
}
|
||
|
||
// 重置流以准备下一段语音
|
||
_recognizer!.reset(_stream!);
|
||
}
|
||
}
|
||
} catch (e) {
|
||
debugPrint('❌ 音频处理错误: $e');
|
||
}
|
||
});
|
||
}
|
||
|
||
/// 处理实时音频数据
|
||
void _processAudioData(Uint8List audioData) {
|
||
if (_recognizer == null || _stream == null || !_isRecording) {
|
||
return;
|
||
}
|
||
|
||
try {
|
||
// 将音频字节数据转换为 16-bit PCM float32 样本
|
||
final samples = _convertAudioDataToSamples(audioData);
|
||
|
||
if (samples.isNotEmpty) {
|
||
// 输入音频数据到识别器
|
||
_stream!.acceptWaveform(sampleRate: 16000, samples: samples);
|
||
|
||
// 更新音频电平显示
|
||
_updateAudioLevelFromSamples(samples);
|
||
|
||
debugPrint('🎵 处理音频数据: ${samples.length} 样本');
|
||
}
|
||
} catch (e) {
|
||
debugPrint('❌ 处理音频数据错误: $e');
|
||
}
|
||
}
|
||
|
||
/// 将音频字节数据转换为 Float32 样本
|
||
Float32List _convertAudioDataToSamples(Uint8List audioData) {
|
||
// PCM 16-bit 数据转换为 float32 样本
|
||
// 每个样本占用 2 字节 (16-bit)
|
||
final sampleCount = audioData.length ~/ 2;
|
||
final samples = Float32List(sampleCount);
|
||
|
||
for (int i = 0; i < sampleCount; i++) {
|
||
// 读取 16-bit little-endian 整数
|
||
final sample16 = (audioData[i * 2 + 1] << 8) | audioData[i * 2];
|
||
|
||
// 转换为有符号 16-bit 整数
|
||
final signedSample = sample16 > 32767 ? sample16 - 65536 : sample16;
|
||
|
||
// 归一化到 [-1.0, 1.0] 范围
|
||
samples[i] = signedSample / 32768.0;
|
||
}
|
||
|
||
return samples;
|
||
}
|
||
|
||
/// 从音频样本更新音频电平
|
||
void _updateAudioLevelFromSamples(Float32List samples) {
|
||
if (samples.isEmpty) return;
|
||
|
||
// 计算 RMS (Root Mean Square) 电平
|
||
double sumSquares = 0.0;
|
||
for (final sample in samples) {
|
||
sumSquares += sample * sample;
|
||
}
|
||
final rmsLevel = sqrt(sumSquares / samples.length);
|
||
|
||
// 转换为分贝并归一化到 [0.0, 1.0]
|
||
final dbLevel = 20 * log(rmsLevel) / ln10;
|
||
final normalizedLevel = (dbLevel + 60) / 60; // 假设 -60dB 到 0dB 范围
|
||
final clampedLevel = normalizedLevel.clamp(0.0, 1.0);
|
||
|
||
// 添加到音频电平列表
|
||
if (_audioLevels.length >= 50) {
|
||
_audioLevels.removeAt(0);
|
||
}
|
||
_audioLevels.add(clampedLevel);
|
||
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 清除识别历史
|
||
void clearHistory() {
|
||
_history.clear();
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 设置当前文本
|
||
void _setCurrentText(String text) {
|
||
_currentText = text;
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 追加文本到累积文本中
|
||
void _appendToAccumulatedText(String text) {
|
||
if (text.trim().isEmpty) return;
|
||
|
||
if (_accumulatedText.isEmpty) {
|
||
_accumulatedText = text;
|
||
} else {
|
||
// 添加适当的分隔符
|
||
_accumulatedText += _accumulatedText.endsWith('。') ||
|
||
_accumulatedText.endsWith('!') ||
|
||
_accumulatedText.endsWith('?')
|
||
? ' '
|
||
: ',';
|
||
_accumulatedText += text;
|
||
}
|
||
|
||
// 同时更新最终文本为累积文本
|
||
_finalText = _accumulatedText;
|
||
_currentText = ''; // 清除当前文本
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 清除当前文本
|
||
void _clearCurrentText() {
|
||
_currentText = '';
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 清除累积文本
|
||
void clearAccumulatedText() {
|
||
_accumulatedText = '';
|
||
_finalText = '';
|
||
_currentText = '';
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 添加到历史记录
|
||
void _addToHistory(String text, double confidence) {
|
||
final result = RecognitionResult(
|
||
text: text,
|
||
confidence: confidence,
|
||
timestamp: DateTime.now(),
|
||
);
|
||
_history.insert(0, result);
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 设置状态
|
||
void _setState(RecognitionState state) {
|
||
_state = state;
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 设置错误
|
||
void _setError(String error) {
|
||
_errorMessage = error;
|
||
debugPrint('❌ 错误: $error');
|
||
notifyListeners();
|
||
}
|
||
|
||
/// 清除错误
|
||
void _clearError() {
|
||
_errorMessage = null;
|
||
notifyListeners();
|
||
}
|
||
|
||
@override
|
||
void dispose() {
|
||
stopRecognition();
|
||
_audioSubscription?.cancel();
|
||
_recorder.dispose();
|
||
_recognizer = null;
|
||
super.dispose();
|
||
}
|
||
}
|