import time from pyannote.audio import Pipeline from pyannote.audio.pipelines import SpeakerDiarization from pyannote.core import Annotation from pyannote.audio.pipelines.utils.hook import ProgressHook from pydantic import BaseModel speaker_diarization:SpeakerDiarization = print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型") speaker_diarization = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token="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") speaker_diarization.instantiate({ "clustering": { "method": "centroid", "min_cluster_size": 12, "threshold": 0.7045654963945799 }, "segmentation":{ "min_duration_off": 0.33 } }) class SpeakerItem(BaseModel): speaker: str start: float end: float duration: float def run(wavPath:str) -> list[SpeakerItem]: print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别语音") with ProgressHook() as hook: diarization: Annotation = speaker_diarization(wavPath,hook=hook) result:list[SpeakerItem] = [] for key,value in diarization._tracks.items(): result.append( SpeakerItem( speaker=next(iter(value.values())), start= round(key.start, 3), end= round(key.end, 3), duration= round(key.duration, 3) ) ) print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别完成") return result