初始化

2024-10-14 15:48:28 +08:00 · 2024-10-14 15:48:28 +08:00 · c0bf17f863
commit c0bf17f863
13 changed files with 92 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+.venv/
--- a/12
+++ b/12
@ -0,0 +1,12 @@
+
+FROM python:3.12.7
+
+WORKDIR /code
+
+COPY ./requirements.txt /code/requirements.txt
+
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+
+COPY ./app /code/app
+
+CMD ["fastapi", "run", "app/main.py", "--port", "80"]
--- a/app/pycache/main.cpython-312.pyc
+++ b/app/pycache/main.cpython-312.pyc
--- a/app/pycache/speakerAI.cpython-312.pyc
+++ b/app/pycache/speakerAI.cpython-312.pyc
--- a/app/main.py
+++ b/app/main.py
@ -0,0 +1,9 @@
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import speakerAI as speakerAI
+
+app = FastAPI()
+@app.post("/api/speakerrun", response_model=list[speakerAI.SpeakerItem])
+def speakerRun(path: str):
+    res = speakerAI.run(path)
+    return res
--- a/app/speakerAI.py
+++ b/app/speakerAI.py
@ -0,0 +1,41 @@
+import time
+from pyannote.audio import Pipeline
+from pyannote.audio.pipelines import SpeakerDiarization
+from pyannote.core import Annotation
+from pyannote.audio.pipelines.utils.hook import ProgressHook
+from pydantic import BaseModel
+
+
+speaker_diarization:SpeakerDiarization = print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型")
+speaker_diarization = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",use_auth_token="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")
+speaker_diarization.instantiate({
+    "clustering": {
+        "method": "centroid",
+        "min_cluster_size": 12,
+        "threshold": 0.7045654963945799
+    },
+    "segmentation":{
+        "min_duration_off": 0.33
+    }
+})
+class SpeakerItem(BaseModel):
+    speaker: str
+    start: float
+    end: float
+    duration: float
+def run(wavPath:str) -> list[SpeakerItem]:
+    print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别语音")
+    with ProgressHook() as hook:
+        diarization: Annotation = speaker_diarization(wavPath,hook=hook)
+    result:list[SpeakerItem] = []
+    for key,value in diarization._tracks.items():
+        result.append(
+            SpeakerItem(
+                speaker=next(iter(value.values())),
+                start= round(key.start, 3),
+                end= round(key.end, 3),
+                duration= round(key.duration, 3)
+            )
+        )
+    print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>识别完成")
+    return result
--- a/app/toONNX.py
+++ b/app/toONNX.py
@ -0,0 +1,26 @@
+# pip install torch onnx https://github.com/pyannote/pyannote-audio/archive/refs/heads/develop.zip
+import torch
+from pyannote.audio import Pipeline
+import time
+print(f"{time.strftime("%Y/%m/%d %H:%M:%S")}=>加载预训练模型")
+model = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
+                              use_auth_token="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").eval()
+
+dummy_input = torch.zeros(2, 1, 160000)
+torch.onnx.export(
+    model,
+    #输入参数
+    dummy_input,
+    #输出文件名
+    'speaker@3_1.onnx',
+    #应用常量折叠优化
+    do_constant_folding=True,
+    input_names=["input"],
+    output_names=["output"],
+    dynamic_axes={
+        "input": {0: "batch_size", 1: "num_channels", 2: "num_samples"},
+        "output": {0: "batch_size", 1: "num_frames"},
+    },
+)
+
+print(f"任务完成")
--- a/file/output_000.wav
+++ b/file/output_000.wav
--- a/file/讲话人1-2.m4a.wav
+++ b/file/讲话人1-2.m4a.wav
--- a/file/讲话人1-3.m4a.wav
+++ b/file/讲话人1-3.m4a.wav
--- a/requirements.txt
+++ b/requirements.txt
--- a/run.sh
+++ b/run.sh
@ -0,0 +1,2 @@
+pip install -r requirements.txt
+fastapi run ./app/main.py
--- a/1
+++ b/1
@ -0,0 +1 @@
+python 3.12.7