初始化
This commit is contained in:
commit
c0bf17f863
|
|
@ -0,0 +1 @@
|
|||
.venv/
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
|
||||
FROM python:3.12.7
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
COPY ./requirements.txt /code/requirements.txt
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
||||
|
||||
COPY ./app /code/app
|
||||
|
||||
CMD ["fastapi", "run", "app/main.py", "--port", "80"]
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,9 @@
|
|||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
import speakerAI as speakerAI
|
||||
|
||||
app = FastAPI()
|
||||
@app.post("/api/speakerrun", response_model=list[speakerAI.SpeakerItem])
|
||||
def speakerRun(path: str):
|
||||
res = speakerAI.run(path)
|
||||
return res
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
import time
|
||||
from pyannote.audio import Pipeline
|
||||
from pyannote.audio.pipelines import SpeakerDiarization
|
||||
from pyannote.core import Annotation
|
||||
from pyannote.audio.pipelines.utils.hook import ProgressHook
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
speaker_diarization:SpeakerDiarization = print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型")
|
||||
speaker_diarization = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token="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")
|
||||
speaker_diarization.instantiate({
|
||||
"clustering": {
|
||||
"method": "centroid",
|
||||
"min_cluster_size": 12,
|
||||
"threshold": 0.7045654963945799
|
||||
},
|
||||
"segmentation":{
|
||||
"min_duration_off": 0.33
|
||||
}
|
||||
})
|
||||
class SpeakerItem(BaseModel):
|
||||
speaker: str
|
||||
start: float
|
||||
end: float
|
||||
duration: float
|
||||
def run(wavPath:str) -> list[SpeakerItem]:
|
||||
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别语音")
|
||||
with ProgressHook() as hook:
|
||||
diarization: Annotation = speaker_diarization(wavPath,hook=hook)
|
||||
result:list[SpeakerItem] = []
|
||||
for key,value in diarization._tracks.items():
|
||||
result.append(
|
||||
SpeakerItem(
|
||||
speaker=next(iter(value.values())),
|
||||
start= round(key.start, 3),
|
||||
end= round(key.end, 3),
|
||||
duration= round(key.duration, 3)
|
||||
)
|
||||
)
|
||||
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别完成")
|
||||
return result
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# pip install torch onnx https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip
|
||||
import torch
|
||||
from pyannote.audio import Pipeline
|
||||
import time
|
||||
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型")
|
||||
model = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
|
||||
use_auth_token="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").eval()
|
||||
|
||||
dummy_input = torch.zeros(2, 1, 160000)
|
||||
torch.onnx.export(
|
||||
model,
|
||||
#输入参数
|
||||
dummy_input,
|
||||
#输出文件名
|
||||
'speaker@3_1.onnx',
|
||||
#应用常量折叠优化
|
||||
do_constant_folding=True,
|
||||
input_names=["input"],
|
||||
output_names=["output"],
|
||||
dynamic_axes={
|
||||
"input": {0: "batch_size", 1: "num_channels", 2: "num_samples"},
|
||||
"output": {0: "batch_size", 1: "num_frames"},
|
||||
},
|
||||
)
|
||||
|
||||
print(f"任务完成")
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,2 @@
|
|||
pip install -r requirements.txt
|
||||
fastapi run ./app/main.py
|
||||
Loading…
Reference in New Issue