42 lines
1.4 KiB
Python
42 lines
1.4 KiB
Python
import time
|
|
from pyannote.audio import Pipeline
|
|
from pyannote.audio.pipelines import SpeakerDiarization
|
|
from pyannote.core import Annotation
|
|
from pyannote.audio.pipelines.utils.hook import ProgressHook
|
|
from pydantic import BaseModel
|
|
|
|
|
|
speaker_diarization:SpeakerDiarization = print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型")
|
|
speaker_diarization = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token="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")
|
|
speaker_diarization.instantiate({
|
|
"clustering": {
|
|
"method": "centroid",
|
|
"min_cluster_size": 12,
|
|
"threshold": 0.7045654963945799
|
|
},
|
|
"segmentation":{
|
|
"min_duration_off": 0.33
|
|
}
|
|
})
|
|
class SpeakerItem(BaseModel):
|
|
speaker: str
|
|
start: float
|
|
end: float
|
|
duration: float
|
|
def run(wavPath:str) -> list[SpeakerItem]:
|
|
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别语音")
|
|
with ProgressHook() as hook:
|
|
diarization: Annotation = speaker_diarization(wavPath,hook=hook)
|
|
result:list[SpeakerItem] = []
|
|
for key,value in diarization._tracks.items():
|
|
result.append(
|
|
SpeakerItem(
|
|
speaker=next(iter(value.values())),
|
|
start= round(key.start, 3),
|
|
end= round(key.end, 3),
|
|
duration= round(key.duration, 3)
|
|
)
|
|
)
|
|
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别完成")
|
|
return result
|