commit c0bf17f8631222155a73c94ac9ffc3577ccd7c29 Author: 小肥羊 <1048382248@qq.com> Date: Mon Oct 14 15:48:28 2024 +0800 初始化 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0cafc1c --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.venv/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dbe1174 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ + +FROM python:3.12.7 + +WORKDIR /code + +COPY ./requirements.txt /code/requirements.txt + +RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt + +COPY ./app /code/app + +CMD ["fastapi", "run", "app/main.py", "--port", "80"] \ No newline at end of file diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc new file mode 100644 index 0000000..68616f0 Binary files /dev/null and b/app/__pycache__/main.cpython-312.pyc differ diff --git a/app/__pycache__/speakerAI.cpython-312.pyc b/app/__pycache__/speakerAI.cpython-312.pyc new file mode 100644 index 0000000..1ac23f1 Binary files /dev/null and b/app/__pycache__/speakerAI.cpython-312.pyc differ diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..4c4d84b --- /dev/null +++ b/app/main.py @@ -0,0 +1,9 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import speakerAI as speakerAI + +app = FastAPI() +@app.post("/api/speakerrun", response_model=list[speakerAI.SpeakerItem]) +def speakerRun(path: str): + res = speakerAI.run(path) + return res \ No newline at end of file diff --git a/app/speakerAI.py b/app/speakerAI.py new file mode 100644 index 0000000..288e381 --- /dev/null +++ b/app/speakerAI.py @@ -0,0 +1,41 @@ +import time +from pyannote.audio import Pipeline +from pyannote.audio.pipelines import SpeakerDiarization +from pyannote.core import Annotation +from pyannote.audio.pipelines.utils.hook import ProgressHook +from pydantic import BaseModel + + +speaker_diarization:SpeakerDiarization = print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型") +speaker_diarization = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token="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") +speaker_diarization.instantiate({ + "clustering": { + "method": "centroid", + "min_cluster_size": 12, + "threshold": 0.7045654963945799 + }, + "segmentation":{ + "min_duration_off": 0.33 + } +}) +class SpeakerItem(BaseModel): + speaker: str + start: float + end: float + duration: float +def run(wavPath:str) -> list[SpeakerItem]: + print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别语音") + with ProgressHook() as hook: + diarization: Annotation = speaker_diarization(wavPath,hook=hook) + result:list[SpeakerItem] = [] + for key,value in diarization._tracks.items(): + result.append( + SpeakerItem( + speaker=next(iter(value.values())), + start= round(key.start, 3), + end= round(key.end, 3), + duration= round(key.duration, 3) + ) + ) + print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别完成") + return result diff --git a/app/toONNX.py b/app/toONNX.py new file mode 100644 index 0000000..dffd190 --- /dev/null +++ b/app/toONNX.py @@ -0,0 +1,26 @@ +# pip install torch onnx https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip +import torch +from pyannote.audio import Pipeline +import time +print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型") +model = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", + use_auth_token="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").eval() + +dummy_input = torch.zeros(2, 1, 160000) +torch.onnx.export( + model, + #输入参数 + dummy_input, + #输出文件名 + 'speaker@3_1.onnx', + #应用常量折叠优化 + do_constant_folding=True, + input_names=["input"], + output_names=["output"], + dynamic_axes={ + "input": {0: "batch_size", 1: "num_channels", 2: "num_samples"}, + "output": {0: "batch_size", 1: "num_frames"}, + }, +) + +print(f"任务完成") diff --git a/file/output_000.wav b/file/output_000.wav new file mode 100644 index 0000000..a07260c Binary files /dev/null and b/file/output_000.wav differ diff --git a/file/讲话人1-2.m4a.wav b/file/讲话人1-2.m4a.wav new file mode 100644 index 0000000..178e399 Binary files /dev/null and b/file/讲话人1-2.m4a.wav differ diff --git a/file/讲话人1-3.m4a.wav b/file/讲话人1-3.m4a.wav new file mode 100644 index 0000000..8bb9d5e Binary files /dev/null and b/file/讲话人1-3.m4a.wav differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..31dd052 Binary files /dev/null and b/requirements.txt differ diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..813eb78 --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +pip install -r requirements.txt +fastapi run ./app/main.py \ No newline at end of file diff --git a/运行环境 b/运行环境 new file mode 100644 index 0000000..a79d352 --- /dev/null +++ b/运行环境 @@ -0,0 +1 @@ +python 3.12.7