I am building a React Native app that records audio on an Android device, streams it to my native module, and then sends the recorded audio to the Google Speech API for transcription. However, despite getting a response from the Google Speech API, I am not able to retrieve any transcription text. The API response includes a status: 429, but no transcription data is returned.
Here’s the code I’m using in my React Native app:
React Native Component (SpeechMode.js):
import React, { useEffect, useState } from 'react';
import { NativeEventEmitter, NativeModules, View, Text, StyleSheet, Button } from 'react-native';
import axios from 'axios';
const { AudioInputModule } = NativeModules;
const audioEventEmitter = new NativeEventEmitter(AudioInputModule);
const SpeechMode = () => {
const [isRecording, setIsRecording] = useState(false);
const [transcription, setTranscription] = useState('');
useEffect(() => {
const subscription = audioEventEmitter.addListener('AudioData', (data) => {
processAudioChunk(data);
});
return () => {
AudioInputModule.stopAudioStream();
subscription.remove();
};
}, []);
const startRecording = () => {
setIsRecording(true);
setTranscription(''); // Clear previous transcription
AudioInputModule.startAudioStream();
};
const stopRecording = () => {
setIsRecording(false);
AudioInputModule.stopAudioStream();
};
const processAudioChunk = async (base64Data) => {
try {
const transcriptionResult = await transcribeAudio(base64Data);
if (transcriptionResult) {
setTranscription((prev) => prev + ' ' + transcriptionResult);
}
} catch (error) {
console.error('Error in transcription:', error);
}
};
const transcribeAudio = async (base64Data) => {
const GOOGLE_API_KEY = 'api key';
const url = `https://speech.googleapis.com/v1/speech:recognize?key=${GOOGLE_API_KEY}`;
const requestBody = {
config: {
encoding: 'LINEAR16',
sampleRateHertz: 16000,
languageCode: 'en-US',
},
audio: {
content: base64Data,
},
};
try {
const response = await axios.post(url, requestBody);
if (response.data && response.data.results) {
return response.data.results
.map((result) => result.alternatives[0].transcript)
.join(' ');
}
} catch (error) {
console.error('Google API Error:', error.response || error.message);
}
return '';
};
return (
<View style={styles.container}>
<Text style={styles.title}>Real-Time Speech-to-Text</Text>
<Button
title={isRecording ? 'Stop Recording' : 'Start Recording'}
onPress={isRecording ? stopRecording : startRecording}
/>
<Text style={styles.subtitle}>
{isRecording ? 'Listening...' : 'Ready to Record'}
</Text>
<View style={styles.outputContainer}>
<Text style={styles.outputLabel}>Transcription:</Text>
<Text style={styles.output}>{transcription}</Text>
</View>
</View>
);
};
export default SpeechMode;
Native Module Code:
AudioInputModule.java (Java Code):
package com.webrtcexample;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.util.Base64;
import com.facebook.react.bridge.ReactApplicationContext;
import com.facebook.react.bridge.ReactContextBaseJavaModule;
import com.facebook.react.bridge.ReactMethod;
import com.facebook.react.modules.core.DeviceEventManagerModule;
public class AudioInputModule extends ReactContextBaseJavaModule {
private static final int SAMPLE_RATE = 16000;
private boolean isRecording = false;
private Thread recordingThread;
public AudioInputModule(ReactApplicationContext reactContext) {
super(reactContext);
}
@Override
public String getName() {
return "AudioInputModule";
}
@ReactMethod
public void startAudioStream() {
if (isRecording) return;
isRecording = true;
recordingThread = new Thread(() -> {
AudioRecord audioRecord = new AudioRecord(
MediaRecorder.AudioSource.MIC,
SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
AudioRecord.getMinBufferSize(
SAMPLE_RATE,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT
)
);
if (audioRecord.getState() != AudioRecord.STATE_INITIALIZED) {
sendEvent("onError", "AudioRecord initialization failed");
return;
}
audioRecord.startRecording();
byte[] buffer = new byte[2048];
while (isRecording) {
int read = audioRecord.read(buffer, 0, buffer.length);
if (read > 0) {
// Convert PCM data to Base64 and send to React Native
String base64Audio = Base64.encodeToString(buffer, 0, read, Base64.NO_WRAP);
sendEvent("AudioData", base64Audio);
}
}
audioRecord.stop();
audioRecord.release();
});
recordingThread.start();
}
@ReactMethod
public void stopAudioStream() {
isRecording = false;
if (recordingThread != null) {
try {
recordingThread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
recordingThread = null;
}
}
private void sendEvent(String eventName, Object data) {
getReactApplicationContext()
.getJSModule(DeviceEventManagerModule.RCTDeviceEventEmitter.class)
.emit(eventName, data);
}
}
I guess the problem is with the audio input I am taking what should , and I know of limit but ever before the limit it was not giving the text