Skip to content

Commit 3060a85

Browse files
committed
1. Add timestamp information to the returned result 2. The service initialization will automatically load the model
1 parent c261a02 commit 3060a85

4 files changed

Lines changed: 39 additions & 16 deletions

File tree

Dockerfile

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
FROM registry.cn-hangzhou.aliyuncs.com/modelscope-repo/modelscope:ubuntu20.04-cuda11.3.0-py37-torch1.11.0-tf1.15.5-1.6.0
2-
WORKDIR /modelscope/pytorch
1+
FROM paidax/dev-containers:modelscope-v0.6
2+
3+
ARG HTTP_PROXY
4+
ENV HTTP_PROXY=${HTTP_PROXY}
5+
ENV HTTPS_PROXY=${HTTP_PROXY}
6+
7+
WORKDIR /home/funasr
38

49
RUN pip install --no-cache-dir \
510
loguru \
@@ -11,5 +16,9 @@ RUN pip install --no-cache-dir \
1116

1217
COPY . .
1318

14-
RUN python download_model.py
19+
RUN python download_model.py && \
20+
git clone https://github.com/alibaba/FunASR.git && \
21+
cd FunASR && \
22+
pip install -e ./
1523

24+
WORKDIR /home/funasr

docker-compose.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ services:
44
build:
55
context: .
66
dockerfile: Dockerfile
7-
image: paidax/funasr_python:0.3.2
7+
args:
8+
HTTP_PROXY: ${HTTP_PROXY}
9+
image: paidax/funasr_python:0.4.2
810
runtime: nvidia
911
ports:
1012
- 9527:9527
11-
command: uvicorn main:app --port 9527 --host 0.0.0.0
13+
command: tail -f /dev/null

download_model.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from modelscope.hub.snapshot_download import snapshot_download
22

3-
snapshot_download("damo/speech_fsmn_vad_zh-cn-16k-common-pytorch")
4-
snapshot_download("damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
5-
snapshot_download("damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch")
3+
snapshot_download("damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch") # 长音频模型
4+
snapshot_download("damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch") # 标点符号模型
5+
snapshot_download("damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch") # 语音端点检测模型
6+
snapshot_download("damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch")

main.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,11 @@ def initialize_model(model_type, hotword):
3535
loaded_model["model_type"] = "normal"
3636
model = pipeline(
3737
task=Tasks.auto_speech_recognition,
38-
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
3938
model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
39+
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
4040
# lm_model='damo/speech_transformer_lm_zh-cn-common-vocab8404-pytorch',
41-
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
41+
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
42+
timestamp_model="damo/speech_timestamp_prediction-v1-16k-offline"
4243
)
4344
elif model_type == "long":
4445
log.debug("lodding model: long")
@@ -72,6 +73,10 @@ def load_model(model_type, hotword):
7273
if loaded_model["model_type"] is None or loaded_model["model_type"] != model_type or (loaded_model["model_type"] == "hotword" and hotword_parm["hotword"] != hotword):
7374
loaded_model["model"] = initialize_model(model_type, hotword)
7475

76+
@app.on_event("startup")
77+
async def startup_event():
78+
load_model(model_type="long", hotword=None)
79+
rec_result = loaded_model["model"](audio_in="./16000_001.wav")
7580

7681
@app.post("/asr", tags=["ASR"], summary="聚合ASR模型接口服务")
7782
async def predict(items: Audio):
@@ -93,14 +98,20 @@ async def predict(items: Audio):
9398
log.info(f"Received a url in string, url: {items.file}")
9499
decoded_data = requests.get(items.file).content
95100

96-
load_model(model_type=items.model_type, hotword=items.hotword)
101+
load_model(model_type="long", hotword=items.hotword)
97102
rec_result = loaded_model["model"](audio_in=decoded_data)
98-
if items.model_type=='normal' or items.model_type=='long':
99-
rec_result = {
100-
"text": rec_result["text"]
101-
}
103+
result = []
102104
log.info(rec_result)
103-
return rec_result
105+
for sentence in rec_result["sentences"]:
106+
result.append(
107+
{
108+
"text": sentence["text"],
109+
"start": sentence["start"] / 1000.0,
110+
"end": sentence["end"] / 1000.0
111+
}
112+
)
113+
log.info(result)
114+
return result
104115

105116
@app.get("/health")
106117
async def health_check():

0 commit comments

Comments
 (0)