From 0d840bbb73986d9a8c026753248883d9d787d8ca Mon Sep 17 00:00:00 2001 From: Katharine Berry Date: Sat, 30 Jun 2018 15:20:01 -0700 Subject: [PATCH] Make it run in production. --- Procfile | 1 + asr/__init__.py | 80 +++++++++++++++++++++++++----------------------- requirements.txt | 19 ++++++++++++ runtime.txt | 1 + 4 files changed, 63 insertions(+), 38 deletions(-) create mode 100644 Procfile create mode 100644 requirements.txt create mode 100644 runtime.txt diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..2097dff --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: gunicorn -k gevent -b 0.0.0.0:$PORT asr:app diff --git a/asr/__init__.py b/asr/__init__.py index e741ddd..b5a1050 100644 --- a/asr/__init__.py +++ b/asr/__init__.py @@ -1,14 +1,20 @@ +import gevent.monkey +gevent.monkey.patch_all() +import base64 from email.mime.multipart import MIMEMultipart from email.message import Message import json import struct +import os -from flask import Flask, request, Response -from google.cloud import speech - +import requests +from flask import Flask, request, Response, abort app = Flask(__name__) +AUTH_URL = "https://auth.rebble.io" +API_KEY = os.environ['SPEECH_API_KEY'] + # We know gunicorn does this, but it doesn't *say* it does this, so we must signal it manually. @app.before_request @@ -18,7 +24,6 @@ def handle_chunking(): def parse_chunks(stream): boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip() # super lazy/brittle parsing. - print("Boundary: " + boundary.decode('utf-8')) this_frame = b'' while True: content = stream.read(4096) @@ -28,7 +33,6 @@ def parse_chunks(stream): frame = this_frame[:end] if frame != b'': header, content = frame.split(b'\r\n\r\n', 1) - print(content) yield content[:-2] this_frame = this_frame[end + len(boundary):] if content == b'': @@ -36,44 +40,45 @@ def parse_chunks(stream): break -def parse_data(): - boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip() # super lazy/brittle parsing. - parts = request.data.split(boundary) - for part in parts: - if part == b'': - continue - yield part.split(b'\r\n\r\n', 1)[1][:-2] - - @app.route('/NmspServlet/', methods=["POST"]) def recognise(): - - client = speech.SpeechClient() stream = request.stream + + access_token, part1, part2 = request.host.split('.', 1)[0].split('-', 3) + lang = f"{part1}-{part2.upper()}" + + auth_req = requests.get(f"{AUTH_URL}/api/v1/me/token", headers={'Authorization': f"Bearer {access_token}"}) + if not auth_req.ok: + abort(401) + chunks = iter(list(parse_chunks(stream))) content = next(chunks).decode('utf-8') - print(content) - config = speech.types.RecognitionConfig( - encoding='SPEEX_WITH_HEADER_BYTE', - language_code='en-US', - sample_rate_hertz=16000, - ) - print('beginning request') - responses = client.streaming_recognize( - config=speech.types.StreamingRecognitionConfig(config=config), - requests=( - speech.types.StreamingRecognizeRequest(audio_content=struct.pack('B', len(x)) + x) - for x in chunks)) - print('finished request') + body = { + 'config': { + 'encoding': 'SPEEX_WITH_HEADER_BYTE', + 'language_code': lang, + 'sample_rate_hertz': 16000, + 'max_alternatives': 1, + # 'metadata': { + # 'interaction_type': 'DICTATION', + # 'microphone_distance': 'NEARFIELD', + # }, + }, + 'audio': { + 'content': base64.b64encode(b''.join((struct.pack('B', len(x)) + x for x in chunks))).decode('utf-8'), + }, + } + result = requests.post(f'https://speech.googleapis.com/v1/speech:recognize?key={API_KEY}', json=body) + result.raise_for_status() + words = [] - for response in responses: - if response.results: - for result in response.results: - words.extend({ - 'word': x, - 'confidence': result.alternatives[0].confidence - } for x in result.alternatives[0].transcript.split(' ')) + if 'results' in result.json(): + for result in result.json()['results']: + words.extend({ + 'word': x, + 'confidence': result['alternatives'][0]['confidence'] + } for x in result['alternatives'][0]['transcript'].split(' ')) # Now for some reason we also need to give back a mime/multipart message... parts = MIMEMultipart() @@ -83,6 +88,7 @@ def recognise(): if len(words) > 0: response_part.add_header('Content-Disposition', 'form-data; name="QueryResult"') words[0]['word'] += '\\*no-space-before' + words[0]['word'] = words[0]['word'][0].upper() + words[0]['word'][1:] response_part.set_payload(json.dumps({ 'words': [words], })) @@ -96,10 +102,8 @@ def recognise(): "Prompt": "Sorry, speech not recognized. Please try again." })) parts.attach(response_part) - print(parts.as_string()) response = Response(parts.as_string().split("\n", 3)[3]) response.headers['Content-Type'] = f'multipart/form-data; boundary={parts.get_boundary()}' - response.headers['Connection'] = 'close' return response diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2df9200 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,19 @@ +certifi==2018.4.16 +chardet==3.0.4 +click==6.7 +Flask==1.0.2 +gevent==1.3.4 +greenlet==0.4.13 +gunicorn==19.8.1 +idna==2.7 +itsdangerous==0.24 +Jinja2==2.10 +MarkupSafe==1.0 +pyasn1==0.4.3 +pyasn1-modules==0.2.2 +pytz==2018.4 +requests==2.19.1 +rsa==3.4.2 +six==1.11.0 +urllib3==1.23 +Werkzeug==0.14.1 diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000..1935e97 --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-3.6.6