Make it run in production.

This commit is contained in:
Katharine Berry 2018-06-30 15:20:01 -07:00
parent 054b77cfd7
commit 0d840bbb73
4 changed files with 63 additions and 38 deletions

1
Procfile Normal file
View file

@ -0,0 +1 @@
web: gunicorn -k gevent -b 0.0.0.0:$PORT asr:app

View file

@ -1,14 +1,20 @@
import gevent.monkey
gevent.monkey.patch_all()
import base64
from email.mime.multipart import MIMEMultipart
from email.message import Message
import json
import struct
import os
from flask import Flask, request, Response
from google.cloud import speech
import requests
from flask import Flask, request, Response, abort
app = Flask(__name__)
AUTH_URL = "https://auth.rebble.io"
API_KEY = os.environ['SPEECH_API_KEY']
# We know gunicorn does this, but it doesn't *say* it does this, so we must signal it manually.
@app.before_request
@ -18,7 +24,6 @@ def handle_chunking():
def parse_chunks(stream):
boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip() # super lazy/brittle parsing.
print("Boundary: " + boundary.decode('utf-8'))
this_frame = b''
while True:
content = stream.read(4096)
@ -28,7 +33,6 @@ def parse_chunks(stream):
frame = this_frame[:end]
if frame != b'':
header, content = frame.split(b'\r\n\r\n', 1)
print(content)
yield content[:-2]
this_frame = this_frame[end + len(boundary):]
if content == b'':
@ -36,44 +40,45 @@ def parse_chunks(stream):
break
def parse_data():
boundary = b'--' + request.headers['content-type'].split(';')[1].split('=')[1].encode('utf-8').strip() # super lazy/brittle parsing.
parts = request.data.split(boundary)
for part in parts:
if part == b'':
continue
yield part.split(b'\r\n\r\n', 1)[1][:-2]
@app.route('/NmspServlet/', methods=["POST"])
def recognise():
client = speech.SpeechClient()
stream = request.stream
access_token, part1, part2 = request.host.split('.', 1)[0].split('-', 3)
lang = f"{part1}-{part2.upper()}"
auth_req = requests.get(f"{AUTH_URL}/api/v1/me/token", headers={'Authorization': f"Bearer {access_token}"})
if not auth_req.ok:
abort(401)
chunks = iter(list(parse_chunks(stream)))
content = next(chunks).decode('utf-8')
print(content)
config = speech.types.RecognitionConfig(
encoding='SPEEX_WITH_HEADER_BYTE',
language_code='en-US',
sample_rate_hertz=16000,
)
print('beginning request')
responses = client.streaming_recognize(
config=speech.types.StreamingRecognitionConfig(config=config),
requests=(
speech.types.StreamingRecognizeRequest(audio_content=struct.pack('B', len(x)) + x)
for x in chunks))
print('finished request')
body = {
'config': {
'encoding': 'SPEEX_WITH_HEADER_BYTE',
'language_code': lang,
'sample_rate_hertz': 16000,
'max_alternatives': 1,
# 'metadata': {
# 'interaction_type': 'DICTATION',
# 'microphone_distance': 'NEARFIELD',
# },
},
'audio': {
'content': base64.b64encode(b''.join((struct.pack('B', len(x)) + x for x in chunks))).decode('utf-8'),
},
}
result = requests.post(f'https://speech.googleapis.com/v1/speech:recognize?key={API_KEY}', json=body)
result.raise_for_status()
words = []
for response in responses:
if response.results:
for result in response.results:
if 'results' in result.json():
for result in result.json()['results']:
words.extend({
'word': x,
'confidence': result.alternatives[0].confidence
} for x in result.alternatives[0].transcript.split(' '))
'confidence': result['alternatives'][0]['confidence']
} for x in result['alternatives'][0]['transcript'].split(' '))
# Now for some reason we also need to give back a mime/multipart message...
parts = MIMEMultipart()
@ -83,6 +88,7 @@ def recognise():
if len(words) > 0:
response_part.add_header('Content-Disposition', 'form-data; name="QueryResult"')
words[0]['word'] += '\\*no-space-before'
words[0]['word'] = words[0]['word'][0].upper() + words[0]['word'][1:]
response_part.set_payload(json.dumps({
'words': [words],
}))
@ -96,10 +102,8 @@ def recognise():
"Prompt": "Sorry, speech not recognized. Please try again."
}))
parts.attach(response_part)
print(parts.as_string())
response = Response(parts.as_string().split("\n", 3)[3])
response.headers['Content-Type'] = f'multipart/form-data; boundary={parts.get_boundary()}'
response.headers['Connection'] = 'close'
return response

19
requirements.txt Normal file
View file

@ -0,0 +1,19 @@
certifi==2018.4.16
chardet==3.0.4
click==6.7
Flask==1.0.2
gevent==1.3.4
greenlet==0.4.13
gunicorn==19.8.1
idna==2.7
itsdangerous==0.24
Jinja2==2.10
MarkupSafe==1.0
pyasn1==0.4.3
pyasn1-modules==0.2.2
pytz==2018.4
requests==2.19.1
rsa==3.4.2
six==1.11.0
urllib3==1.23
Werkzeug==0.14.1

1
runtime.txt Normal file
View file

@ -0,0 +1 @@
python-3.6.6