865 lines
28 KiB
Dart
865 lines
28 KiB
Dart
import 'dart:async';
|
||
import 'dart:io';
|
||
import 'package:flutter/foundation.dart';
|
||
import 'package:flutter/services.dart';
|
||
import 'package:path_provider/path_provider.dart';
|
||
import 'package:sherpa_onnx/sherpa_onnx.dart';
|
||
import 'package:permission_handler/permission_handler.dart';
|
||
import 'package:record/record.dart';
|
||
import 'models/speech_recognition_result.dart';
|
||
import 'models/speech_recognition_error.dart';
|
||
import 'interfaces/speech_recognition_service.dart';
|
||
|
||
/// 语音识别速度配置
|
||
enum RecognitionSpeed {
|
||
/// 超快速模式 - 50ms 间隔,最佳实时体验,适合演示
|
||
ultraFast(50, '超快速'),
|
||
|
||
/// 快速模式 - 100ms 间隔,平衡性能和体验
|
||
fast(100, '快速'),
|
||
|
||
/// 标准模式 - 150ms 间隔,标准体验
|
||
normal(150, '标准'),
|
||
|
||
/// 省电模式 - 200ms 间隔,降低CPU使用率
|
||
battery(200, '省电');
|
||
|
||
const RecognitionSpeed(this.milliseconds, this.description);
|
||
|
||
/// 识别循环间隔(毫秒)
|
||
final int milliseconds;
|
||
|
||
/// 模式描述
|
||
final String description;
|
||
}
|
||
|
||
/// 音频采样率配置
|
||
enum SampleRate {
|
||
/// 8kHz - 电话质量,文件小但质量较低
|
||
low(8000, '8kHz (电话质量)'),
|
||
|
||
/// 16kHz - 标准语音识别,平衡质量和性能 (推荐)
|
||
standard(16000, '16kHz (标准)'),
|
||
|
||
/// 22kHz - 高质量语音,适合清晰录音
|
||
high(22050, '22kHz (高质量)'),
|
||
|
||
/// 44kHz - CD质量,最高质量但文件较大
|
||
ultra(44100, '44kHz (CD质量)'),
|
||
|
||
/// 48kHz - 专业录音质量
|
||
professional(48000, '48kHz (专业级)');
|
||
|
||
const SampleRate(this.hz, this.description);
|
||
|
||
/// 采样率(Hz)
|
||
final int hz;
|
||
|
||
/// 描述
|
||
final String description;
|
||
}
|
||
|
||
/// 解码方法配置
|
||
enum DecodingMethod {
|
||
/// 贪心搜索 - 快速但可能不是最优结果
|
||
greedySearch('greedy_search', '贪心搜索'),
|
||
|
||
/// 修改的束搜索 - 更准确但稍慢
|
||
modifiedBeamSearch('modified_beam_search', '束搜索');
|
||
|
||
const DecodingMethod(this.value, this.description);
|
||
|
||
/// sherpa-onnx 中的值
|
||
final String value;
|
||
|
||
/// 描述
|
||
final String description;
|
||
}
|
||
|
||
/// 识别质量配置预设
|
||
enum RecognitionQuality {
|
||
/// 快速模式 - 优先速度
|
||
fast('快速模式'),
|
||
|
||
/// 平衡模式 - 速度和质量平衡 (推荐)
|
||
balanced('平衡模式'),
|
||
|
||
/// 高质量模式 - 优先准确性
|
||
highQuality('高质量模式'),
|
||
|
||
/// 自定义模式 - 用户自定义参数
|
||
custom('自定义模式');
|
||
|
||
const RecognitionQuality(this.description);
|
||
|
||
/// 描述
|
||
final String description;
|
||
}
|
||
|
||
/// 高级识别配置
|
||
class AdvancedRecognitionConfig {
|
||
/// 解码方法
|
||
final DecodingMethod decodingMethod;
|
||
|
||
/// 束搜索的最大活跃路径数 (仅在使用束搜索时有效)
|
||
final int maxActivePaths;
|
||
|
||
/// 是否启用端点检测
|
||
final bool enableEndpoint;
|
||
|
||
/// 端点检测规则1:最小尾随静音时间(秒)
|
||
final double rule1MinTrailingSilence;
|
||
|
||
/// 端点检测规则2:最小尾随静音时间(秒)
|
||
final double rule2MinTrailingSilence;
|
||
|
||
/// 端点检测规则3:最小话语长度(秒)
|
||
final double rule3MinUtteranceLength;
|
||
|
||
/// 特征维度 (通常为80)
|
||
final int featureDim;
|
||
|
||
/// 空白惩罚 (用于CTC模型)
|
||
final double blankPenalty;
|
||
|
||
const AdvancedRecognitionConfig({
|
||
this.decodingMethod = DecodingMethod.greedySearch,
|
||
this.maxActivePaths = 4,
|
||
this.enableEndpoint = true,
|
||
this.rule1MinTrailingSilence = 2.4,
|
||
this.rule2MinTrailingSilence = 1.2,
|
||
this.rule3MinUtteranceLength = 20.0,
|
||
this.featureDim = 80,
|
||
this.blankPenalty = 0.0,
|
||
});
|
||
|
||
/// 快速模式预设
|
||
static const AdvancedRecognitionConfig fast = AdvancedRecognitionConfig(
|
||
decodingMethod: DecodingMethod.greedySearch,
|
||
maxActivePaths: 2,
|
||
enableEndpoint: true,
|
||
rule1MinTrailingSilence: 1.8,
|
||
rule2MinTrailingSilence: 0.8,
|
||
rule3MinUtteranceLength: 15.0,
|
||
featureDim: 80,
|
||
blankPenalty: 0.0,
|
||
);
|
||
|
||
/// 平衡模式预设 (推荐)
|
||
static const AdvancedRecognitionConfig balanced = AdvancedRecognitionConfig(
|
||
decodingMethod: DecodingMethod.greedySearch,
|
||
maxActivePaths: 4,
|
||
enableEndpoint: true,
|
||
rule1MinTrailingSilence: 2.4,
|
||
rule2MinTrailingSilence: 1.2,
|
||
rule3MinUtteranceLength: 20.0,
|
||
featureDim: 80,
|
||
blankPenalty: 0.0,
|
||
);
|
||
|
||
/// 高质量模式预设
|
||
static const AdvancedRecognitionConfig highQuality =
|
||
AdvancedRecognitionConfig(
|
||
decodingMethod: DecodingMethod.modifiedBeamSearch,
|
||
maxActivePaths: 8,
|
||
enableEndpoint: true,
|
||
rule1MinTrailingSilence: 3.0,
|
||
rule2MinTrailingSilence: 1.5,
|
||
rule3MinUtteranceLength: 25.0,
|
||
featureDim: 80,
|
||
blankPenalty: 0.0,
|
||
);
|
||
}
|
||
|
||
/// 识别状态枚举
|
||
enum RecognitionState {
|
||
idle,
|
||
processing,
|
||
listening,
|
||
error,
|
||
}
|
||
|
||
/// 识别结果类
|
||
class RecognitionResult {
|
||
final String text;
|
||
final double confidence;
|
||
final DateTime timestamp;
|
||
|
||
RecognitionResult({
|
||
required this.text,
|
||
required this.confidence,
|
||
required this.timestamp,
|
||
});
|
||
|
||
Map<String, dynamic> toJson() => {
|
||
'text': text,
|
||
'confidence': confidence,
|
||
'timestamp': timestamp.toIso8601String(),
|
||
};
|
||
}
|
||
|
||
/// 基于 sherpa_onnx 的完整语音识别实现
|
||
///
|
||
/// 参考您的 TTS 项目架构,提供离线语音识别功能
|
||
class YxAsrService implements SpeechRecognitionService {
|
||
static final YxAsrService _instance = YxAsrService._internal();
|
||
|
||
/// 获取单例实例
|
||
factory YxAsrService() => _instance;
|
||
|
||
YxAsrService._internal();
|
||
|
||
// sherpa_onnx 相关组件
|
||
OnlineRecognizer? _recognizer;
|
||
OnlineStream? _stream;
|
||
|
||
// 音频录制相关组件
|
||
final AudioRecorder _audioRecorder = AudioRecorder();
|
||
|
||
// 状态管理变量
|
||
bool _isListening = false;
|
||
bool _isStartingRecording = false; // 防抖保护:防止重复启动录音
|
||
bool _isInitialized = false;
|
||
String _currentModelPath = '';
|
||
String _lastRecognizedText = ''; // 记录上次识别的文本,避免重复发送
|
||
|
||
// 识别速度配置
|
||
RecognitionSpeed _recognitionSpeed = RecognitionSpeed.fast;
|
||
|
||
// 采样率配置
|
||
SampleRate _sampleRate = SampleRate.standard;
|
||
|
||
// 高级识别配置
|
||
AdvancedRecognitionConfig _advancedConfig =
|
||
AdvancedRecognitionConfig.balanced;
|
||
|
||
// 事件流控制器
|
||
final StreamController<SpeechRecognitionResult> _resultController =
|
||
StreamController<SpeechRecognitionResult>.broadcast();
|
||
final StreamController<SpeechRecognitionError> _errorController =
|
||
StreamController<SpeechRecognitionError>.broadcast();
|
||
final StreamController<bool> _statusController =
|
||
StreamController<bool>.broadcast();
|
||
|
||
// 定时器和订阅
|
||
Timer? _recognitionTimer;
|
||
StreamSubscription<Uint8List>? _audioSubscription;
|
||
|
||
/// 检查语音识别是否可用
|
||
Future<bool> isAvailable() async {
|
||
try {
|
||
// sherpa_onnx 总是可用的,只要有模型文件
|
||
return true;
|
||
} catch (e) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/// 请求麦克风权限
|
||
Future<bool> requestPermission() async {
|
||
final status = await Permission.microphone.request();
|
||
print('🔍 [YxAsr] 权限请求结果: $status');
|
||
return status == PermissionStatus.granted;
|
||
}
|
||
|
||
/// 检查是否有麦克风权限
|
||
Future<bool> hasPermission() async {
|
||
final status = await Permission.microphone.status;
|
||
print('🔍 [YxAsr] 当前权限状态: $status');
|
||
return status == PermissionStatus.granted;
|
||
}
|
||
|
||
/// 检查权限状态并提供详细信息
|
||
Future<Map<String, dynamic>> getPermissionStatus() async {
|
||
final status = await Permission.microphone.status;
|
||
return {
|
||
'status': status.toString(),
|
||
'isGranted': status == PermissionStatus.granted,
|
||
'isDenied': status == PermissionStatus.denied,
|
||
'isPermanentlyDenied': status == PermissionStatus.permanentlyDenied,
|
||
'isRestricted': status == PermissionStatus.restricted,
|
||
'canRequest': status != PermissionStatus.permanentlyDenied,
|
||
};
|
||
}
|
||
|
||
/// 设置识别速度
|
||
///
|
||
/// [speed] - 识别速度配置
|
||
void setRecognitionSpeed(RecognitionSpeed speed) {
|
||
_recognitionSpeed = speed;
|
||
debugPrint(
|
||
'🔧 [YxAsr] 识别速度设置为: ${speed.description} (${speed.milliseconds}ms)');
|
||
}
|
||
|
||
/// 获取当前识别速度
|
||
RecognitionSpeed get recognitionSpeed => _recognitionSpeed;
|
||
|
||
/// 设置采样率
|
||
///
|
||
/// [sampleRate] - 采样率配置
|
||
/// 注意:需要在初始化之前设置,或重新初始化后生效
|
||
void setSampleRate(SampleRate sampleRate) {
|
||
_sampleRate = sampleRate;
|
||
debugPrint(
|
||
'🔧 [YxAsr] 采样率设置为: ${sampleRate.description} (${sampleRate.hz}Hz)');
|
||
}
|
||
|
||
/// 获取当前采样率
|
||
SampleRate get sampleRate => _sampleRate;
|
||
|
||
/// 设置高级识别配置
|
||
///
|
||
/// [config] - 高级配置,可以使用预设或自定义
|
||
/// 注意:需要在初始化之前设置,或重新初始化后生效
|
||
void setAdvancedConfig(AdvancedRecognitionConfig config) {
|
||
_advancedConfig = config;
|
||
debugPrint('🔧 [YxAsr] 高级配置已更新: ${config.decodingMethod.description}');
|
||
}
|
||
|
||
/// 设置识别质量预设
|
||
///
|
||
/// [quality] - 质量预设
|
||
void setRecognitionQuality(RecognitionQuality quality) {
|
||
switch (quality) {
|
||
case RecognitionQuality.fast:
|
||
_advancedConfig = AdvancedRecognitionConfig.fast;
|
||
_recognitionSpeed = RecognitionSpeed.ultraFast;
|
||
break;
|
||
case RecognitionQuality.balanced:
|
||
_advancedConfig = AdvancedRecognitionConfig.balanced;
|
||
_recognitionSpeed = RecognitionSpeed.fast;
|
||
break;
|
||
case RecognitionQuality.highQuality:
|
||
_advancedConfig = AdvancedRecognitionConfig.highQuality;
|
||
_recognitionSpeed = RecognitionSpeed.normal;
|
||
break;
|
||
case RecognitionQuality.custom:
|
||
// 保持当前配置不变
|
||
break;
|
||
}
|
||
debugPrint('🔧 [YxAsr] 识别质量设置为: ${quality.description}');
|
||
}
|
||
|
||
/// 获取当前高级配置
|
||
AdvancedRecognitionConfig get advancedConfig => _advancedConfig;
|
||
|
||
/// 使用指定模型初始化识别器
|
||
///
|
||
/// [modelPath] - 模型文件路径,例如 'assets/models/sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30'
|
||
/// 采样率使用 setSampleRate() 设置的值
|
||
Future<bool> initializeWithModel(String modelPath) async {
|
||
try {
|
||
print('🔍 [YxAsr] 开始初始化,模型路径: $modelPath');
|
||
|
||
if (_isInitialized && _currentModelPath == modelPath) {
|
||
print('✅ [YxAsr] 已经初始化过,直接返回');
|
||
return true;
|
||
}
|
||
|
||
// 检查麦克风权限
|
||
print('🔍 [YxAsr] 检查麦克风权限...');
|
||
final permissionInfo = await getPermissionStatus();
|
||
print('🔍 [YxAsr] 权限详情: $permissionInfo');
|
||
|
||
if (!permissionInfo['isGranted']) {
|
||
if (permissionInfo['isPermanentlyDenied']) {
|
||
print('❌ [YxAsr] 麦克风权限被永久拒绝,需要用户手动在设置中开启');
|
||
_sendError(SpeechRecognitionErrorType.permissionDenied,
|
||
'麦克风权限被永久拒绝,请在设置中手动开启麦克风权限', 'PERMISSION_PERMANENTLY_DENIED');
|
||
return false;
|
||
} else if (permissionInfo['canRequest']) {
|
||
print('⚠️ [YxAsr] 麦克风权限未授予,尝试请求权限...');
|
||
final granted = await requestPermission();
|
||
if (!granted) {
|
||
print('❌ [YxAsr] 用户拒绝了麦克风权限');
|
||
_sendError(SpeechRecognitionErrorType.permissionDenied,
|
||
'需要麦克风权限才能进行语音识别,请允许应用访问麦克风', 'PERMISSION_DENIED');
|
||
return false;
|
||
}
|
||
} else {
|
||
print('❌ [YxAsr] 麦克风权限受限');
|
||
_sendError(SpeechRecognitionErrorType.permissionDenied,
|
||
'麦克风权限受限,无法进行语音识别', 'PERMISSION_RESTRICTED');
|
||
return false;
|
||
}
|
||
}
|
||
print('✅ [YxAsr] 麦克风权限检查通过');
|
||
|
||
// 释放之前的识别器资源
|
||
print('🔍 [YxAsr] 清理之前的资源...');
|
||
await _cleanup();
|
||
|
||
// 准备模型文件(复制到临时目录)
|
||
print('🔍 [YxAsr] 准备模型文件...');
|
||
final actualModelPath = await _prepareModelFiles(modelPath);
|
||
|
||
// 打印模型文件路径
|
||
final encoderPath = '$actualModelPath/encoder-epoch-99-avg-1.int8.onnx';
|
||
final decoderPath = '$actualModelPath/decoder-epoch-99-avg-1.int8.onnx';
|
||
final joinerPath = '$actualModelPath/joiner-epoch-99-avg-1.int8.onnx';
|
||
final tokensPath = '$actualModelPath/tokens.txt';
|
||
|
||
print('🔍 [YxAsr] 模型文件路径:');
|
||
print(' - encoder: $encoderPath');
|
||
print(' - decoder: $decoderPath');
|
||
print(' - joiner: $joinerPath');
|
||
print(' - tokens: $tokensPath');
|
||
|
||
// 构建模型配置
|
||
print('🔍 [YxAsr] 构建识别器配置...');
|
||
debugPrint('🔧 [YxAsr] 使用采样率: ${_sampleRate.description}');
|
||
debugPrint(
|
||
'🔧 [YxAsr] 解码方法: ${_advancedConfig.decodingMethod.description}');
|
||
debugPrint('🔧 [YxAsr] 最大活跃路径: ${_advancedConfig.maxActivePaths}');
|
||
debugPrint(
|
||
'🔧 [YxAsr] 端点检测: ${_advancedConfig.enableEndpoint ? "启用" : "禁用"}');
|
||
|
||
final config = OnlineRecognizerConfig(
|
||
feat: FeatureConfig(
|
||
sampleRate: _sampleRate.hz,
|
||
featureDim: _advancedConfig.featureDim,
|
||
),
|
||
model: OnlineModelConfig(
|
||
transducer: OnlineTransducerModelConfig(
|
||
encoder: encoderPath,
|
||
decoder: decoderPath,
|
||
joiner: joinerPath,
|
||
),
|
||
tokens: tokensPath,
|
||
),
|
||
// 使用高级配置的解码参数
|
||
decodingMethod: _advancedConfig.decodingMethod.value,
|
||
maxActivePaths: _advancedConfig.maxActivePaths,
|
||
|
||
// 使用高级配置的端点检测参数
|
||
enableEndpoint: _advancedConfig.enableEndpoint,
|
||
rule1MinTrailingSilence: _advancedConfig.rule1MinTrailingSilence,
|
||
rule2MinTrailingSilence: _advancedConfig.rule2MinTrailingSilence,
|
||
rule3MinUtteranceLength: _advancedConfig.rule3MinUtteranceLength,
|
||
|
||
// 使用高级配置的其他参数
|
||
blankPenalty: _advancedConfig.blankPenalty,
|
||
);
|
||
|
||
// 初始化 sherpa-onnx 绑定
|
||
print('🔍 [YxAsr] 初始化 sherpa-onnx 绑定...');
|
||
initBindings();
|
||
|
||
// 创建在线识别器实例
|
||
print('🔍 [YxAsr] 创建在线识别器实例...');
|
||
try {
|
||
_recognizer = OnlineRecognizer(config);
|
||
print('🔍 [YxAsr] 在线识别器创建成功');
|
||
} catch (e) {
|
||
print('❌ [YxAsr] 在线识别器创建失败: $e');
|
||
throw e;
|
||
}
|
||
|
||
_currentModelPath = modelPath;
|
||
_isInitialized = true;
|
||
|
||
print('✅ [YxAsr] 初始化成功!');
|
||
return true;
|
||
} catch (e) {
|
||
print('❌ [YxAsr] 初始化失败: $e');
|
||
_sendError(SpeechRecognitionErrorType.service, '初始化识别器失败: $e', null);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/// 开始语音识别
|
||
///
|
||
/// [partialResults] - 是否返回部分结果
|
||
/// 采样率使用 setSampleRate() 设置的值
|
||
Future<void> startListening({
|
||
bool partialResults = true,
|
||
}) async {
|
||
try {
|
||
if (!_isInitialized || _recognizer == null) {
|
||
throw Exception('识别器未初始化,请先调用 initializeWithModel()');
|
||
}
|
||
|
||
if (_isListening) {
|
||
debugPrint('⚠️ [YxAsr] 已在录音状态,忽略重复调用');
|
||
return;
|
||
}
|
||
|
||
// 添加防抖保护
|
||
if (_isStartingRecording) {
|
||
debugPrint('⚠️ [YxAsr] 正在启动录音,忽略重复调用');
|
||
return;
|
||
}
|
||
|
||
_isStartingRecording = true;
|
||
|
||
try {
|
||
// 先创建音频流用于识别
|
||
_stream = _recognizer!.createStream();
|
||
debugPrint('🔧 [YxAsr] 音频流已创建: ${_stream != null}');
|
||
|
||
// 验证音频流创建是否成功
|
||
if (_stream == null) {
|
||
throw Exception('音频流创建失败');
|
||
}
|
||
|
||
// 等待一小段时间确保流创建完成
|
||
await Future.delayed(const Duration(milliseconds: 100));
|
||
|
||
// 再次验证音频流状态
|
||
if (_stream == null) {
|
||
throw Exception('音频流在等待后变为null');
|
||
}
|
||
|
||
// 开始音频录制
|
||
await _startAudioRecording(_sampleRate.hz);
|
||
|
||
_isListening = true;
|
||
_lastRecognizedText = ''; // 重置上次识别的文本
|
||
_statusController.add(true);
|
||
|
||
// 开始识别循环处理
|
||
_startRecognitionLoop(partialResults);
|
||
|
||
debugPrint('✅ [YxAsr] 录音启动成功');
|
||
} finally {
|
||
_isStartingRecording = false;
|
||
}
|
||
} catch (e) {
|
||
_sendError(SpeechRecognitionErrorType.service, '开始识别失败: $e', null);
|
||
// 清理失败的状态
|
||
_stream = null;
|
||
_isListening = false;
|
||
_statusController.add(false);
|
||
}
|
||
}
|
||
|
||
/// 停止语音识别
|
||
Future<void> stopListening() async {
|
||
if (!_isListening) {
|
||
debugPrint('🛑 [YxAsr] 停止识别被忽略: 未在录音状态');
|
||
return;
|
||
}
|
||
|
||
// 防抖保护:如果正在启动录音,等待启动完成
|
||
if (_isStartingRecording) {
|
||
debugPrint('⚠️ [YxAsr] 正在启动录音,等待启动完成后再停止');
|
||
// 等待启动完成,但设置超时避免无限等待
|
||
int waitCount = 0;
|
||
while (_isStartingRecording && waitCount < 20) {
|
||
// 最多等待1秒
|
||
await Future.delayed(const Duration(milliseconds: 50));
|
||
waitCount++;
|
||
}
|
||
if (waitCount >= 20) {
|
||
debugPrint('⚠️ [YxAsr] 等待启动完成超时,强制停止');
|
||
_isStartingRecording = false;
|
||
}
|
||
}
|
||
|
||
if (_stream == null) {
|
||
debugPrint('⚠️ [YxAsr] 停止识别: 音频流为null,但仍在录音状态');
|
||
}
|
||
|
||
try {
|
||
debugPrint('🛑 [YxAsr] 开始停止识别...');
|
||
|
||
// 立即设置状态,避免竞争条件(与 TTS 项目保持一致)
|
||
_isListening = false;
|
||
_statusController.add(false);
|
||
|
||
// 停止识别循环定时器
|
||
_recognitionTimer?.cancel();
|
||
_recognitionTimer = null;
|
||
|
||
// 停止音频录制
|
||
await _stopAudioRecording();
|
||
|
||
// 重置流,准备下次识别
|
||
if (_stream != null) {
|
||
_stream = null;
|
||
}
|
||
|
||
debugPrint('✅ [YxAsr] 识别已停止');
|
||
} catch (e) {
|
||
debugPrint('❌ [YxAsr] 停止识别失败: $e');
|
||
_isListening = false;
|
||
_statusController.add(false);
|
||
_sendError(SpeechRecognitionErrorType.service, '停止识别失败: $e', null);
|
||
}
|
||
}
|
||
|
||
/// 取消语音识别
|
||
Future<void> cancel() async {
|
||
await _stopAudioRecording();
|
||
_recognitionTimer?.cancel();
|
||
_recognitionTimer = null;
|
||
_isListening = false;
|
||
_statusController.add(false);
|
||
}
|
||
|
||
/// 是否正在监听
|
||
bool get isListening => _isListening;
|
||
|
||
/// 识别结果流
|
||
Stream<SpeechRecognitionResult> get onResult => _resultController.stream;
|
||
|
||
/// 错误信息流
|
||
Stream<SpeechRecognitionError> get onError => _errorController.stream;
|
||
|
||
/// 监听状态变化流
|
||
Stream<bool> get onListeningStatusChanged => _statusController.stream;
|
||
|
||
/// 实现接口的初始化方法
|
||
@override
|
||
Future<bool> initialize(Map<String, dynamic> config) async {
|
||
print('🔍 [YxAsr] initialize() 被调用,config: $config');
|
||
|
||
if (!await isAvailable()) {
|
||
print('❌ [YxAsr] 服务不可用');
|
||
return false;
|
||
}
|
||
|
||
// 使用项目中的模型文件路径
|
||
final modelPath = config['modelPath'] as String? ?? 'assets/models';
|
||
print('🔍 [YxAsr] 使用模型路径: $modelPath');
|
||
|
||
return await initializeWithModel(modelPath);
|
||
}
|
||
|
||
/// 便捷初始化方法
|
||
Future<bool> initializeWithDefaultModel([String? modelPath]) async {
|
||
// 如果没有指定路径,使用项目中的模型文件
|
||
final defaultPath = modelPath ?? 'assets/models';
|
||
print('🔍 [YxAsr] initializeWithDefaultModel() 被调用,使用路径: $defaultPath');
|
||
return await initialize({'modelPath': defaultPath});
|
||
}
|
||
|
||
/// 开始音频录制
|
||
Future<void> _startAudioRecording(int sampleRate) async {
|
||
try {
|
||
debugPrint('🎤 [YxAsr] 配置音频录制,采样率: ${sampleRate}Hz');
|
||
// 配置音频录制参数
|
||
final config = RecordConfig(
|
||
encoder: AudioEncoder.pcm16bits,
|
||
sampleRate: sampleRate,
|
||
numChannels: 1,
|
||
);
|
||
|
||
// 开始录制音频流
|
||
final stream = await _audioRecorder.startStream(config);
|
||
|
||
// 监听音频数据流
|
||
_audioSubscription = stream.listen(
|
||
(audioData) {
|
||
// 多重状态检查,确保所有条件都满足
|
||
if (_stream != null &&
|
||
_recognizer != null &&
|
||
_isListening &&
|
||
!_isStartingRecording) {
|
||
// 将音频数据转换为 Float32List 格式
|
||
final samples = _convertToFloat32(audioData);
|
||
debugPrint(
|
||
'🎵 [YxAsr] 接收音频数据: ${audioData.length} 字节, ${samples.length} 样本');
|
||
// 发送音频数据到识别器进行处理
|
||
_stream!.acceptWaveform(sampleRate: sampleRate, samples: samples);
|
||
} else {
|
||
debugPrint(
|
||
'❌ [YxAsr] 音频数据丢弃: stream=${_stream != null}, recognizer=${_recognizer != null}, listening=$_isListening, starting=$_isStartingRecording');
|
||
}
|
||
},
|
||
onError: (error) {
|
||
_sendError(SpeechRecognitionErrorType.audio, '音频录制错误: $error', null);
|
||
},
|
||
);
|
||
} catch (e) {
|
||
_sendError(SpeechRecognitionErrorType.audio, '开始音频录制失败: $e', null);
|
||
}
|
||
}
|
||
|
||
/// 停止音频录制
|
||
Future<void> _stopAudioRecording() async {
|
||
try {
|
||
debugPrint('🛑 [YxAsr] 停止音频录制...');
|
||
|
||
// 1. 先停止录音器(与 TTS 项目保持一致的顺序)
|
||
if (await _audioRecorder.isRecording()) {
|
||
await _audioRecorder.stop();
|
||
debugPrint('🛑 [YxAsr] 录音器已停止');
|
||
}
|
||
|
||
// 2. 再取消音频订阅
|
||
await _audioSubscription?.cancel();
|
||
_audioSubscription = null;
|
||
debugPrint('🛑 [YxAsr] 音频订阅已取消');
|
||
} catch (e) {
|
||
debugPrint('⚠️ [YxAsr] 停止录制时出错: $e');
|
||
// 忽略停止录制时的错误,但记录日志
|
||
}
|
||
}
|
||
|
||
/// 将 Uint8List 音频数据转换为 Float32List 格式
|
||
Float32List _convertToFloat32(Uint8List audioData) {
|
||
// PCM 16-bit 数据转换为 float32 样本
|
||
// 每个样本占用 2 字节 (16-bit)
|
||
final sampleCount = audioData.length ~/ 2;
|
||
final samples = Float32List(sampleCount);
|
||
|
||
for (int i = 0; i < sampleCount; i++) {
|
||
// 读取 16-bit little-endian 整数
|
||
final sample16 = (audioData[i * 2 + 1] << 8) | audioData[i * 2];
|
||
|
||
// 转换为有符号 16-bit 整数
|
||
final signedSample = sample16 > 32767 ? sample16 - 65536 : sample16;
|
||
|
||
// 归一化到 [-1.0, 1.0] 范围
|
||
samples[i] = signedSample / 32768.0;
|
||
}
|
||
|
||
return samples;
|
||
}
|
||
|
||
/// 开始识别循环处理
|
||
void _startRecognitionLoop(bool partialResults) {
|
||
debugPrint(
|
||
'🔄 [YxAsr] 开始识别循环, partialResults: $partialResults, 速度: ${_recognitionSpeed.description} (${_recognitionSpeed.milliseconds}ms)');
|
||
_recognitionTimer = Timer.periodic(
|
||
Duration(milliseconds: _recognitionSpeed.milliseconds), (timer) {
|
||
if (!_isListening || _stream == null || _recognizer == null) {
|
||
debugPrint(
|
||
'🛑 [YxAsr] 识别循环停止: listening=$_isListening, stream=${_stream != null}, recognizer=${_recognizer != null}');
|
||
timer.cancel();
|
||
return;
|
||
}
|
||
|
||
try {
|
||
// 检查是否有新的识别结果可用
|
||
if (_recognizer!.isReady(_stream!)) {
|
||
debugPrint('🔍 [YxAsr] 识别器准备就绪,开始解码');
|
||
// 解码音频流(关键步骤!)
|
||
_recognizer!.decode(_stream!);
|
||
|
||
// 获取实时识别结果
|
||
final result = _recognizer!.getResult(_stream!);
|
||
debugPrint('🔍 [YxAsr] 获取识别结果: "${result.text}"');
|
||
|
||
// 只有当识别结果不为空、启用了部分结果、且与上次结果不同时才发送
|
||
if (result.text.isNotEmpty &&
|
||
partialResults &&
|
||
result.text != _lastRecognizedText) {
|
||
debugPrint('🎤 [YxAsr] 发送实时识别结果: ${result.text}');
|
||
_lastRecognizedText = result.text; // 更新最后识别的文本
|
||
_sendResult(
|
||
recognizedWords: result.text,
|
||
);
|
||
} else if (result.text.isNotEmpty &&
|
||
result.text == _lastRecognizedText) {
|
||
debugPrint('🔄 [YxAsr] 跳过重复识别结果: "${result.text}"');
|
||
}
|
||
|
||
// 端点检测已禁用,由用户手动控制录音结束
|
||
}
|
||
} catch (e) {
|
||
debugPrint('❌ [YxAsr] 识别过程中出错: $e');
|
||
_sendError(SpeechRecognitionErrorType.service, '识别过程中出错: $e', null);
|
||
timer.cancel();
|
||
}
|
||
});
|
||
}
|
||
|
||
/// 发送识别结果到结果流
|
||
void _sendResult({
|
||
required String recognizedWords,
|
||
}) {
|
||
debugPrint('📤 [YxAsr] 发送识别结果: "$recognizedWords"');
|
||
final result = SpeechRecognitionResult(
|
||
recognizedWords: recognizedWords,
|
||
);
|
||
_resultController.add(result);
|
||
}
|
||
|
||
/// 发送错误信息到错误流
|
||
void _sendError(SpeechRecognitionErrorType errorType, String errorMsg,
|
||
String? errorCode) {
|
||
final error = SpeechRecognitionError(
|
||
errorType: errorType,
|
||
errorMsg: errorMsg,
|
||
errorCode: errorCode,
|
||
);
|
||
_errorController.add(error);
|
||
}
|
||
|
||
/// 清理所有资源
|
||
Future<void> _cleanup() async {
|
||
await _stopAudioRecording();
|
||
_recognitionTimer?.cancel();
|
||
_recognitionTimer = null;
|
||
_stream = null;
|
||
_recognizer = null;
|
||
_isListening = false;
|
||
_isInitialized = false;
|
||
}
|
||
|
||
/// 准备模型文件(将 assets 复制到应用文档目录)
|
||
Future<String> _prepareModelFiles(String assetPath) async {
|
||
try {
|
||
// 使用应用文档目录(与 tts_test 保持一致)
|
||
final appDir = await getApplicationDocumentsDirectory();
|
||
final modelDir = Directory('${appDir.path}/models/yx_asr');
|
||
|
||
// 创建模型目录
|
||
if (!await modelDir.exists()) {
|
||
await modelDir.create(recursive: true);
|
||
}
|
||
|
||
// 需要复制的文件列表
|
||
final files = [
|
||
'encoder-epoch-99-avg-1.int8.onnx',
|
||
'decoder-epoch-99-avg-1.int8.onnx',
|
||
'joiner-epoch-99-avg-1.int8.onnx',
|
||
'tokens.txt',
|
||
];
|
||
|
||
debugPrint('🔍 [YxAsr] 开始复制模型文件到: ${modelDir.path}');
|
||
|
||
// 复制每个文件
|
||
for (final fileName in files) {
|
||
final assetFile = '$assetPath/$fileName';
|
||
final targetFile = File('${modelDir.path}/$fileName');
|
||
|
||
// 如果文件不存在,从 assets 复制
|
||
if (!await targetFile.exists()) {
|
||
try {
|
||
debugPrint('🔍 [YxAsr] 复制文件: $assetFile -> ${targetFile.path}');
|
||
final assetData = await rootBundle.load(assetFile);
|
||
await targetFile.writeAsBytes(assetData.buffer.asUint8List());
|
||
debugPrint('✅ [YxAsr] 复制成功: $fileName');
|
||
} catch (e) {
|
||
debugPrint('❌ [YxAsr] 无法复制模型文件 $fileName: $e');
|
||
// 提供更详细的错误信息
|
||
throw Exception('模型文件复制失败: $fileName\n'
|
||
'请确保模型文件存在于: $assetFile\n'
|
||
'当前模型路径: $assetPath\n'
|
||
'支持的新模型: sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30');
|
||
}
|
||
} else {
|
||
debugPrint('⏭️ [YxAsr] 文件已存在,跳过: $fileName');
|
||
}
|
||
}
|
||
|
||
debugPrint('✅ [YxAsr] 模型文件准备完成: ${modelDir.path}');
|
||
return modelDir.path;
|
||
} catch (e) {
|
||
debugPrint('❌ [YxAsr] 准备模型文件失败: $e');
|
||
throw Exception('模型文件准备失败: $e');
|
||
}
|
||
}
|
||
|
||
/// 释放所有资源并关闭流
|
||
Future<void> dispose() async {
|
||
await _cleanup();
|
||
await _resultController.close();
|
||
await _errorController.close();
|
||
await _statusController.close();
|
||
}
|
||
}
|