Make it run in production.

This commit is contained in:
Katharine Berry 2018-06-30 15:20:01 -07:00
parent 054b77cfd7
commit 0d840bbb73
4 changed files with 63 additions and 38 deletions

1
Procfile Normal file
View file

@ -0,0 +1 @@
web: gunicorn -k gevent -b 0.0.0.0:$PORT asr:app

View file

@ -1,14 +1,20 @@
import gevent.monkey
gevent.monkey.patch_all()
import base64
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.message import Message from email.message import Message
import json import json
import struct import struct
import os
from flask import Flask, request, Response import requests
from google.cloud import speech from flask import Flask, request, Response, abort
app = Flask(__name__) app = Flask(__name__)
AUTH_URL = "https://auth.rebble.io"
API_KEY = os.environ['SPEECH_API_KEY']
# We know gunicorn does this, but it doesn't *say* it does this, so we must signal it manually. # We know gunicorn does this, but it doesn't *say* it does this, so we must signal it manually.
@app.before_request @app.before_request
@ -18,7 +24,6 @@ def handle_chunking():
def parse_chunks(stream): def parse_chunks(stream):
boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip() # super lazy/brittle parsing. boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip() # super lazy/brittle parsing.
print("Boundary: " + boundary.decode('utf-8'))
this_frame = b'' this_frame = b''
while True: while True:
content = stream.read(4096) content = stream.read(4096)
@ -28,7 +33,6 @@ def parse_chunks(stream):
frame = this_frame[:end] frame = this_frame[:end]
if frame != b'': if frame != b'':
header, content = frame.split(b'\r\n\r\n', 1) header, content = frame.split(b'\r\n\r\n', 1)
print(content)
yield content[:-2] yield content[:-2]
this_frame = this_frame[end + len(boundary):] this_frame = this_frame[end + len(boundary):]
if content == b'': if content == b'':
@ -36,44 +40,45 @@ def parse_chunks(stream):
break break
def parse_data():
boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip() # super lazy/brittle parsing.
parts = request.data.split(boundary)
for part in parts:
if part == b'':
continue
yield part.split(b'\r\n\r\n', 1)[1][:-2]
@app.route('/NmspServlet/', methods=["POST"]) @app.route('/NmspServlet/', methods=["POST"])
def recognise(): def recognise():
client = speech.SpeechClient()
stream = request.stream stream = request.stream
access_token, part1, part2 = request.host.split('.', 1)[0].split('-', 3)
lang = f"{part1}-{part2.upper()}"
auth_req = requests.get(f"{AUTH_URL}/api/v1/me/token", headers={'Authorization': f"Bearer {access_token}"})
if not auth_req.ok:
abort(401)
chunks = iter(list(parse_chunks(stream))) chunks = iter(list(parse_chunks(stream)))
content = next(chunks).decode('utf-8') content = next(chunks).decode('utf-8')
print(content)
config = speech.types.RecognitionConfig( body = {
encoding='SPEEX_WITH_HEADER_BYTE', 'config': {
language_code='en-US', 'encoding': 'SPEEX_WITH_HEADER_BYTE',
sample_rate_hertz=16000, 'language_code': lang,
) 'sample_rate_hertz': 16000,
print('beginning request') 'max_alternatives': 1,
responses = client.streaming_recognize( # 'metadata': {
config=speech.types.StreamingRecognitionConfig(config=config), # 'interaction_type': 'DICTATION',
requests=( # 'microphone_distance': 'NEARFIELD',
speech.types.StreamingRecognizeRequest(audio_content=struct.pack('B', len(x)) + x) # },
for x in chunks)) },
print('finished request') 'audio': {
'content': base64.b64encode(b''.join((struct.pack('B', len(x)) + x for x in chunks))).decode('utf-8'),
},
}
result = requests.post(f'https://speech.googleapis.com/v1/speech:recognize?key={API_KEY}', json=body)
result.raise_for_status()
words = [] words = []
for response in responses: if 'results' in result.json():
if response.results: for result in result.json()['results']:
for result in response.results: words.extend({
words.extend({ 'word': x,
'word': x, 'confidence': result['alternatives'][0]['confidence']
'confidence': result.alternatives[0].confidence } for x in result['alternatives'][0]['transcript'].split(' '))
} for x in result.alternatives[0].transcript.split(' '))
# Now for some reason we also need to give back a mime/multipart message... # Now for some reason we also need to give back a mime/multipart message...
parts = MIMEMultipart() parts = MIMEMultipart()
@ -83,6 +88,7 @@ def recognise():
if len(words) > 0: if len(words) > 0:
response_part.add_header('Content-Disposition', 'form-data; name="QueryResult"') response_part.add_header('Content-Disposition', 'form-data; name="QueryResult"')
words[0]['word'] += '\\*no-space-before' words[0]['word'] += '\\*no-space-before'
words[0]['word'] = words[0]['word'][0].upper() + words[0]['word'][1:]
response_part.set_payload(json.dumps({ response_part.set_payload(json.dumps({
'words': [words], 'words': [words],
})) }))
@ -96,10 +102,8 @@ def recognise():
"Prompt": "Sorry, speech not recognized. Please try again." "Prompt": "Sorry, speech not recognized. Please try again."
})) }))
parts.attach(response_part) parts.attach(response_part)
print(parts.as_string())
response = Response(parts.as_string().split("\n", 3)[3]) response = Response(parts.as_string().split("\n", 3)[3])
response.headers['Content-Type'] = f'multipart/form-data; boundary={parts.get_boundary()}' response.headers['Content-Type'] = f'multipart/form-data; boundary={parts.get_boundary()}'
response.headers['Connection'] = 'close'
return response return response

19
requirements.txt Normal file
View file

@ -0,0 +1,19 @@
certifi==2018.4.16
chardet==3.0.4
click==6.7
Flask==1.0.2
gevent==1.3.4
greenlet==0.4.13
gunicorn==19.8.1
idna==2.7
itsdangerous==0.24
Jinja2==2.10
MarkupSafe==1.0
pyasn1==0.4.3
pyasn1-modules==0.2.2
pytz==2018.4
requests==2.19.1
rsa==3.4.2
six==1.11.0
urllib3==1.23
Werkzeug==0.14.1

1
runtime.txt Normal file
View file

@ -0,0 +1 @@
python-3.6.6