初始化

This commit is contained in:
小肥羊 2024-10-14 15:48:28 +08:00
commit c0bf17f863
13 changed files with 92 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.venv/

12
Dockerfile Normal file
View File

@ -0,0 +1,12 @@
FROM python:3.12.7
WORKDIR /code
COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
COPY ./app /code/app
CMD ["fastapi", "run", "app/main.py", "--port", "80"]

Binary file not shown.

Binary file not shown.

9
app/main.py Normal file
View File

@ -0,0 +1,9 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import speakerAI as speakerAI
app = FastAPI()
@app.post("/api/speakerrun", response_model=list[speakerAI.SpeakerItem])
def speakerRun(path: str):
res = speakerAI.run(path)
return res

41
app/speakerAI.py Normal file
View File

@ -0,0 +1,41 @@
import time
from pyannote.audio import Pipeline
from pyannote.audio.pipelines import SpeakerDiarization
from pyannote.core import Annotation
from pyannote.audio.pipelines.utils.hook import ProgressHook
from pydantic import BaseModel
speaker_diarization:SpeakerDiarization = print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型")
speaker_diarization = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token="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")
speaker_diarization.instantiate({
"clustering": {
"method": "centroid",
"min_cluster_size": 12,
"threshold": 0.7045654963945799
},
"segmentation":{
"min_duration_off": 0.33
}
})
class SpeakerItem(BaseModel):
speaker: str
start: float
end: float
duration: float
def run(wavPath:str) -> list[SpeakerItem]:
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别语音")
with ProgressHook() as hook:
diarization: Annotation = speaker_diarization(wavPath,hook=hook)
result:list[SpeakerItem] = []
for key,value in diarization._tracks.items():
result.append(
SpeakerItem(
speaker=next(iter(value.values())),
start= round(key.start, 3),
end= round(key.end, 3),
duration= round(key.duration, 3)
)
)
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别完成")
return result

26
app/toONNX.py Normal file
View File

@ -0,0 +1,26 @@
# pip install torch onnx https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip
import torch
from pyannote.audio import Pipeline
import time
print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型")
model = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
use_auth_token="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").eval()
dummy_input = torch.zeros(2, 1, 160000)
torch.onnx.export(
model,
#输入参数
dummy_input,
#输出文件名
'speaker@3_1.onnx',
#应用常量折叠优化
do_constant_folding=True,
input_names=["input"],
output_names=["output"],
dynamic_axes={
"input": {0: "batch_size", 1: "num_channels", 2: "num_samples"},
"output": {0: "batch_size", 1: "num_frames"},
},
)
print(f"任务完成")

BIN
file/output_000.wav Normal file

Binary file not shown.

BIN
file/讲话人1-2.m4a.wav Normal file

Binary file not shown.

BIN
file/讲话人1-3.m4a.wav Normal file

Binary file not shown.

BIN
requirements.txt Normal file

Binary file not shown.

2
run.sh Normal file
View File

@ -0,0 +1,2 @@
pip install -r requirements.txt
fastapi run ./app/main.py

1
运行环境 Normal file
View File

@ -0,0 +1 @@
python 3.12.7