I’m working on a project that does real-time audio analysis. The first chunk sent after pressing the Start button always gives the output “Final Transcription:”, but the subsequent ones give the output “No speech detected in audio”. Also, if I press the stop button and then press start again, the above happens again. I’m using MediaRecorder.jsx to record users voice on another page and it works fine. I tried to fix it with AI but it got more complicated. Please help me.
My code
import { LiveAudioVisualizer } from "react-audio-visualize";
import { useMediaRecorder } from "./MediaRecorder";
const Visualizer = () => {
const {
mediaRecorder,
isRecording,
startRecording,
stopRecording,
recordedChunks,
resetRecording,
} = useMediaRecorder();
const [isSpeaking, setIsSpeaking] = useState(false);
const [transcription, setTranscription] = useState("");
const [accumulatedChunks, setAccumulatedChunks] = useState([]);
const transcriptionRef = useRef("");
const lastChunkRef = useRef(null);
const resetIntervalRef = useRef(null);
const lastSentTimestamp = useRef(Date.now());
useEffect(() => {
if (mediaRecorder && mediaRecorder.state === "recording") {
setIsSpeaking(true);
} else {
setIsSpeaking(false);
}
}, [mediaRecorder]);
// Reset recording every 30 seconds
useEffect(() => {
if (isRecording) {
resetIntervalRef.current = setInterval(() => {
resetRecording();
}, 30000);
}
return () => {
if (resetIntervalRef.current) {
clearInterval(resetIntervalRef.current);
}
};
}, [isRecording, resetRecording]);
// Handle new chunks
useEffect(() => {
if (recordedChunks.length > 0) {
const lastChunk = recordedChunks[recordedChunks.length - 1];
if (lastChunk !== lastChunkRef.current && lastChunk.size > 0) {
console.log("New chunk received:", {
size: lastChunk.size,
time: new Date().toISOString(),
totalChunks: accumulatedChunks.length,
});
setAccumulatedChunks((prev) => [...prev, lastChunk]);
lastChunkRef.current = lastChunk;
}
}
}, [recordedChunks]);
// Handle transcription
useEffect(() => {
const sendAudioForTranscription = async () => {
if (accumulatedChunks.length > 0) {
const audioBlob = new Blob(accumulatedChunks, {
type: "audio/webm;codecs=opus",
});
if (audioBlob.size === 0) {
console.log("Empty audio blob, skipping");
return;
}
const currentTimestamp = Date.now();
if (currentTimestamp - lastSentTimestamp.current < 2000) {
console.log("Skipping request due to rate limit");
return;
}
lastSentTimestamp.current = currentTimestamp;
const formData = new FormData();
formData.append("audio", audioBlob);
try {
const response = await fetch("http://localhost:3000/transcribe", {
method: "POST",
body: formData,
});
const data = await response.json();
if (data.transcription) {
transcriptionRef.current += " " + data.transcription;
setTranscription(transcriptionRef.current.trim());
setAccumulatedChunks([]); // Clear only after successful transcription
}
} catch (error) {
console.error("Transcription error:", error);
}
}
};
const transcriptionInterval = setInterval(() => {
sendAudioForTranscription();
}, 2000);
return () => clearInterval(transcriptionInterval);
}, [accumulatedChunks]);
return (
<div className="container mt-5 poppins-regular">
<div className="row justify-content-center mb-4">
<div className="col-auto">
<button
className="btn btn-primary me-2"
onClick={startRecording}
disabled={isRecording}
>
Start Recording
</button>
<button
className="btn btn-danger"
onClick={stopRecording}
disabled={!isRecording}
>
Stop Recording
</button>
</div>
</div>
{/* Transcription Display */}
{transcription && (
<div className="row justify-content-center mb-4">
<div className="col-auto">
<div className="card p-3">
<h5 className="card-title">Transcription:</h5>
<p className="card-text">{transcription}</p>
</div>
</div>
</div>
)}
{/* Speaker Status Display */}
{isRecording && (
<div className="row justify-content-center mb-4">
<div className="col-auto">
<p className="mb-1">
Status:{" "}
<strong>{isSpeaking ? "Person 1 is speaking" : "Silence"}</strong>
</p>
<p className="mb-1">
Subject: <strong>Car</strong>
</p>
<p className="mb-1">
Emotional State: <strong>Happy</strong>
</p>
</div>
</div>
)}
{/* Your existing visualization component */}
{mediaRecorder && (
<div className="row justify-content-center">
<div className="col-auto">
<LiveAudioVisualizer
mediaRecorder={mediaRecorder}
width={500}
height={200}
barColor="rgba(2, 21, 86, 1)"
minDecibels={-85}
smoothingTimeConstant={0.3}
/>
</div>
</div>
)}
</div>
);
};
export default Visualizer;
import React, { useState, useRef, useCallback, useEffect } from "react";
const useMediaRecorder = () => {
const [isRecording, setIsRecording] = useState(false);
const [recordedChunks, setRecordedChunks] = useState([]);
const mediaRecorderRef = useRef(null);
const streamRef = useRef(null);
const setupRecorder = async () => {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
sampleRate: 48000,
},
});
streamRef.current = stream;
const recorder = new MediaRecorder(stream, {
mimeType: "audio/webm;codecs=opus",
bitsPerSecond: 128000,
});
recorder.ondataavailable = (event) => {
if (event.data && event.data.size > 0) {
setRecordedChunks((prev) => [...prev, event.data]);
}
};
return recorder;
};
const resetRecording = useCallback(async () => {
if (mediaRecorderRef.current) {
mediaRecorderRef.current.stop();
streamRef.current?.getTracks().forEach((track) => track.stop());
setRecordedChunks([]);
const newRecorder = await setupRecorder();
mediaRecorderRef.current = newRecorder;
newRecorder.start(2000);
}
}, []);
const startRecording = async () => {
try {
const recorder = await setupRecorder();
mediaRecorderRef.current = recorder;
recorder.start(2000);
setIsRecording(true);
} catch (err) {
console.error("MediaRecorder error:", err);
}
};
const stopRecording = useCallback(() => {
if (mediaRecorderRef.current && isRecording) {
mediaRecorderRef.current.stop();
streamRef.current?.getTracks().forEach((track) => track.stop());
setIsRecording(false);
setRecordedChunks([]);
}
}, [isRecording]);
return {
isRecording,
startRecording,
stopRecording,
recordedChunks,
mediaRecorder: mediaRecorderRef.current,
resetRecording,
};
};
export { useMediaRecorder };
const express = require("express");
const speech = require("@google-cloud/speech");
const multer = require("multer");
const path = require("path");
const app = express();
const upload = multer();
const cors = require("cors");
app.use(cors());
const credentials = require(path.join(
__dirname,
"../cred.json"
));
const speechClient = new speech.SpeechClient({ credentials });
app.post("/transcribe", upload.single("audio"), async (req, res) => {
try {
if (!req.file) {
return res.status(400).json({ error: "No audio file provided" });
}
console.log("Received audio size:", req.file.buffer.length);
// Smaller buffer size limit
// if (req.file.buffer.length > 100000) {
// return res.json({ transcription: "çok büyük" });
// }
const audioBytes = req.file.buffer.toString("base64");
const config = {
encoding: "WEBM_OPUS",
sampleRateHertz: 48000,
languageCode: "en-US",
model: "default",
enableAutomaticPunctuation: true,
useEnhanced: true,
enableWordConfidence: true,
enableWordTimeOffsets: true,
maxAlternatives: 1,
};
const audio = {
content: audioBytes,
};
const request = {
config,
audio,
};
console.log("Sending request to Google Speech API");
const [response] = await speechClient.recognize(request);
console.log("Raw response:", response);
if (!response.results || response.results.length === 0) {
console.log("No speech detected in audio");
return res.json({ transcription: "" });
}
let transcription = "";
response.results.forEach((result) => {
const alternative = result.alternatives[0];
if (alternative.confidence > 0.8) {
transcription += alternative.transcript + " ";
}
});
console.log("Final transcription:", transcription.trim());
return res.json({ transcription: transcription.trim() });
} catch (error) {
console.error("API Error:", error);
return res.status(500).json({ error: error.message });
}
});
module.exports = app;
const PORT = 3000;
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});



