I have a simple WebSocket project created by FastAPI like the following code:
import uvicorn
from fastapi import FastAPI, WebSocket
from fastapi.responses import HTMLResponse
import numpy as np
import soundfile as sf
app = FastAPI()
html = """
<!DOCTYPE html>
<html>
<body>
<h1>Transcribe Audio With FastAPI</h1>
<p id="status">Connection status will go here</p>
<p id="transcript"></p>
<script>
navigator.mediaDevices.getUserMedia({ audio: { sampleSize: 16, channelCount: 1, sampleRate: 16000 } }).then((stream) => {
if (!MediaRecorder.isTypeSupported('audio/webm'))
return alert('Browser not supported')
const mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm',
})
const socket = new WebSocket('ws://localhost:8000/listen')
socket.onopen = () => {
document.querySelector('#status').textContent = 'Connected'
console.log({ event: 'onopen' })
mediaRecorder.addEventListener('dataavailable', async (event) => {
if (event.data.size > 0 && socket.readyState == 1) {
socket.send(event.data)
}
})
mediaRecorder.start(250)
}
socket.onmessage = (message) => {
const received = message.data
if (received) {
console.log(received)
document.querySelector('#transcript').textContent += ' ' + received
}
}
socket.onclose = () => {
console.log({ event: 'onclose' })
}
socket.onerror = (error) => {
console.log({ event: 'onerror', error })
}
})
</script>
</body>
</html>
"""
@app.get("/")
async def get():
return HTMLResponse(html)
@app.websocket("/listen")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
try:
while True:
data = await websocket.receive_bytes()
print(data)
# Convert data to numpy array
# rest of the process!
except Exception as e:
raise Exception(f'Could not process audio: {e}')
finally:
await websocket.close()
if __name__ == '__main__':
uvicorn.run(app)
After running the project, I want to convert data to a numpy array.
What I have tried:
1)
def tensorize(x):
arr = np.frombuffer(x, dtype=np.float32)
# copy to avoid warning
arr = np.copy(arr)
return arr
@app.websocket("/listen")
async def websocket_endpoint(websocket: WebSocket):
print("I'm here websocket_endpoint")
await websocket.accept()
try:
# deepgram_socket = await process_audio(websocket)
whole = []
counter = 0
while True:
data = await websocket.receive_bytes()
array = tensorize(data)
except Exception as e:
raise Exception(f'Could not process audio: {e}')
finally:
await websocket.close()
raises error:
arr = np.frombuffer(x, dtype=np.float32)
ValueError: buffer size must be a multiple of element size
@app.websocket("/listen")
async def websocket_endpoint(websocket: WebSocket):
print("I'm here websocket_endpoint")
await websocket.accept()
try:
# deepgram_socket = await process_audio(websocket)
whole = []
counter = 0
while True:
data = await websocket.receive_bytes()
data_s16 = np.frombuffer(data, dtype=np.int16, count=len(data) // 2, offset=0)
float_data = data_s16 * 0.5 ** 15
whole.append(float_data)
print(data)
counter += 1
if counter > 20:
data = np.concatenate(whole)
sf.write('stereo_file1.wav', data, 16000, 'PCM_24')
break
print(counter)
# await websocket.send_text(f"Message text was: {data}")
# deepgram_socket.send(data)
except Exception as e:
raise Exception(f'Could not process audio: {e}')
finally:
await websocket.close()
This sample code does not raise any errors, but the output audio file does not contain any perceivable audio. Just noise is saved.