Use the latest_short model where applicable.

2023-08-02 19:02:15 -07:00 · 2023-08-02 19:02:15 -07:00 · 141910548a
commit 141910548a
parent 37302ebed4
2 changed files with 49 additions and 2 deletions
--- a/asr/init.py
+++ b/asr/init.py
@ -1,10 +1,9 @@
-import uuid
-
 import gevent.monkey
 gevent.monkey.patch_all()
 import base64
 from email.mime.multipart import MIMEMultipart
 from email.message import Message
+from .model_map import get_model_for_lang
 import json
 import struct
 import os
@ -69,6 +68,7 @@ def recognise():
            'sample_rate_hertz': 16000,
            'max_alternatives': 1,
            'enableAutomaticPunctuation': True,
+            'model': get_model_for_lang(lang),
            # 'metadata': {
            #     'interaction_type': 'DICTATION',
            #     'microphone_distance': 'NEARFIELD',
--- a/asr/model_map.py
+++ b/asr/model_map.py
@ -0,0 +1,47 @@
+MODEL_MAP = {
+    'af-za': 'default',  # latest_short not supported
+    'cs-cz': 'latest_short',
+    'da-dk': 'latest_short',
+    'de-de': 'latest_short',
+    'en-au': 'latest_short',
+    'en-us': 'latest_short',
+    'en-gb': 'latest_short',
+    'en-in': 'latest_short',
+    'en-ca': 'default',  # latest_short not supported
+    'fi-fi': 'latest_short',
+    'fil-ph': 'default',  # latest_short not supported
+    'fr-ca': 'latest_short',
+    'fr-fr': 'latest_short',
+    'gl-es': 'default',  # latest_short not supported
+    'id-id': 'latest_short',
+    'is-is': 'default',  # latest_short not supported
+    'it-it': 'latest_short',
+    'ko-kr': 'latest_short',
+    'lv-lv': 'default',  # latest_short not supported
+    'lt-lt': 'default',  # latest_short not supported
+    'hr-hr': 'default',  # latest_short not supported
+    'hu-hu': 'default',  # latest_short not supported
+    'ms-my': 'default',  # latest_short not supported
+    'nl-nl': 'latest_short',
+    'nb-no': 'default',  # this isn't listed at all. Did we mean no_NO?
+    'no-no': 'latest_short',
+    'pt-pt': 'latest_short',
+    'pl-pl': 'latest_short',
+    'ro-ro': 'latest_short',
+    'ru-ru': 'latest_short',
+    'es-es': 'latest_short',
+    'es-mx': 'default',  # latest_short not supported
+    'es-us': 'latest_short',
+    'sk-sk': 'default',  # latest_short not supported
+    'sl-sl': 'default',  # this doesn't seem to exist
+    'sl-si': 'default',  # latest_short not supported
+    'sv-se': 'latest_short',
+    'sw-tz': 'default',  # latest_short not supported
+    'sw-ke': 'default',  # latest_short not supported
+    'tr-tr': 'default',  # latest_short lacks automatic punctuation
+    'zu-za': 'default',  # latest_short not supported
+}
+
+
+def get_model_for_lang(code: str) -> str:
+    return MODEL_MAP.get(code.lower(), 'default')