Issue in Audio streaming with Socket IO Flask Application

Below is my user1_interface.html/user2_interface.html code in which i am able to hear the audio but the issue is the audio button is dependent on video. If video is turned on then only i can turn on the Audio.

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>{{ name | capitalize }}</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.4.1/socket.io.min.js"></script>
    <style>
        .video-button {
            position: absolute;
            top: 20px;
            right: 110px;
            background-color: transparent;
            border: none;
            cursor: pointer;
        }

        .video-button img {
            width: 40px;
            height: 40px;
        }

        .remote-video-style {
            position: fixed;
            bottom: -12px;
            right: 20px;
            width: 180px;
            height: 180px;
            z-index: 1000;
        }

        .mute-button {
            position: absolute;
            top: 20px;
            right: 160px;
            background-color: transparent;
            border: none;
            cursor: pointer;
        }

        .mute-button img {
            width: 35px;
            height: 35px;
        }
    </style>
</head>
<body>
    <h2>{{ name | capitalize }}</h2>

    <video id="remoteVideo" autoplay playsinline></video>
    
    <button id="cameraButton" onclick="toggleCamera()" class="video-button">
        <img id="camera-icon" src="{{ url_for('static', filename='vidoff.png') }}" alt="Camera On"
        data-show="{{ url_for('static', filename='vidon.png') }}"
        data-hide="{{ url_for('static', filename='vidoff.png') }}">
    </button>

    <button id="mute-button" class="mute-button">
        <img id="mute-icon" src="{{ url_for('static', filename='mute.png') }}" alt="Mute" 
        data-show="{{ url_for('static', filename='unmute.png') }}"
        data-hide="{{ url_for('static', filename='mute.png') }}">
    </button>

    <script>
        const socket = io();
        const remoteVideo = document.getElementById("remoteVideo");
        const cameraButton = document.getElementById("cameraButton");
        const cameraIcon = document.getElementById("camera-icon");
        const muteButton = document.getElementById("mute-button");
        const muteIcon = document.getElementById("mute-icon");
        let localStream = null;
        let peerConnection = null;
        let isCameraOn = false;
        let isMuted = true;  // Initially muted

        async function toggleCamera() {
            if (isCameraOn) {
                stopCamera();
            } else {
                startCamera();
            }
        }

        async function startCamera() {
            try {
                // Access both video and audio
                localStream = await navigator.mediaDevices.getUserMedia({ video: true, audio: true });
                cameraIcon.src = cameraIcon.getAttribute('data-show');
                isCameraOn = true;

                createPeerConnection();
                localStream.getTracks().forEach(track => peerConnection.addTrack(track, localStream));

                // Initially mute audio
                if (localStream.getAudioTracks().length > 0) {
                    localStream.getAudioTracks()[0].enabled = !isMuted;
                }

                // Create an offer and send it to the other user
                const offer = await peerConnection.createOffer();
                await peerConnection.setLocalDescription(offer);
                socket.emit('offer', { type: 'offer', sdp: offer.sdp });
            } catch (error) {
                console.error("Error accessing camera and microphone:", error);
            }
        }

        function stopCamera() {
            if (localStream) {
                localStream.getTracks().forEach(track => track.stop());
                localStream = null;
            }
            if (peerConnection) {
                peerConnection.close();
                peerConnection = null;
            }

            cameraIcon.src = cameraIcon.getAttribute('data-hide');
            isCameraOn = false;
            remoteVideo.srcObject = null;
            remoteVideo.classList.remove("remote-video-style");
            socket.emit('offer', { type: 'offer', sdp: null });
        }

        function createPeerConnection() {
            peerConnection = new RTCPeerConnection();

            // Handle incoming remote track
            peerConnection.ontrack = (event) => {
                if (event.streams && event.streams[0]) {
                    remoteVideo.srcObject = event.streams[0];
                    console.log("Received remote stream:", event.streams[0]);
                } else {
                    console.warn("No streams in ontrack event.");
                }
                remoteVideo.classList.add("remote-video-style");
            };

            // Handle ICE candidates
            peerConnection.onicecandidate = (event) => {
                if (event.candidate) {
                    socket.emit('candidate', { candidate: event.candidate });
                }
            };
        }

        // Function to toggle Mute/Unmute
        muteButton.addEventListener("click", () => {
            if (localStream && localStream.getAudioTracks().length > 0) {
                isMuted = !isMuted;
                muteIcon.src = isMuted ? muteIcon.getAttribute('data-hide') : muteIcon.getAttribute('data-show');
                localStream.getAudioTracks()[0].enabled = !isMuted;
                
                console.log("Audio muted:", isMuted);
                
                // Notify the other peer about mute/unmute status
                socket.emit('audio-mute', { isMuted });
            }
        });

        // Socket event listeners for signaling
        socket.on("offer", async (data) => {
            if (data.sdp) {
                if (!peerConnection) createPeerConnection();
                await peerConnection.setRemoteDescription(new RTCSessionDescription({ type: "offer", sdp: data.sdp }));
                const answer = await peerConnection.createAnswer();
                await peerConnection.setLocalDescription(answer);
                socket.emit("answer", { type: "answer", sdp: answer.sdp });
            } else {
                if (peerConnection) {
                    peerConnection.close();
                    peerConnection = null;
                }
                remoteVideo.srcObject = null;
                remoteVideo.classList.remove("remote-video-style");
            }
        });

        socket.on("answer", async (data) => {
            if (peerConnection) {
                await peerConnection.setRemoteDescription(new RTCSessionDescription({ type: "answer", sdp: data.sdp }));
            }
        });

        socket.on("candidate", async (data) => {
            if (peerConnection && data.candidate) {
                await peerConnection.addIceCandidate(new RTCIceCandidate(data.candidate));
            }
        });

        // Handle mute/unmute for remote audio
        socket.on("audio-mute", (data) => {
            if (remoteVideo.srcObject && remoteVideo.srcObject.getAudioTracks().length > 0) {
                remoteVideo.srcObject.getAudioTracks()[0].enabled = !data.isMuted;
                console.log("Remote audio muted:", data.isMuted);
            }
        });
    </script>
</body>
</html>

Now i have modified the user1_interface.html/user2_interface.html code, and make the audio independent of video but now i am unable to hear the audio. Below is the code snippet with independent audio feature.

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>{{ name | capitalize }}</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.4.1/socket.io.min.js"></script>
    <style>
        .video-button {
            position: absolute;
            top: 20px;
            right: 110px;
            background-color: transparent;
            border: none;
            cursor: pointer;
        }

        .video-button img {
            width: 40px;
            height: 40px;
        }

        .remote-video-style {
            position: fixed;
            bottom: -12px;
            right: 20px;
            width: 180px;
            height: 180px;
            z-index: 1000;
        }

        .mute-button {
            position: absolute;
            top: 20px;
            right: 160px;
            background-color: transparent;
            border: none;
            cursor: pointer;
        }

        .mute-button img {
            width: 35px;
            height: 35px;
        }
    </style>
</head>
<body>
    <h2>{{ name | capitalize }}</h2>

    <video id="remoteVideo" autoplay playsinline></video>
    
    <button id="cameraButton" onclick="toggleCamera()" class="video-button">
        <img id="camera-icon" src="{{ url_for('static', filename='vidoff.png') }}" alt="Camera On"
        data-show="{{ url_for('static', filename='vidon.png') }}"
        data-hide="{{ url_for('static', filename='vidoff.png') }}">
    </button>

    <button id="mute-button" class="mute-button">
        <img id="mute-icon" src="{{ url_for('static', filename='mute.png') }}" alt="Mute" 
        data-show="{{ url_for('static', filename='unmute.png') }}"
        data-hide="{{ url_for('static', filename='mute.png') }}">
    </button>

    <script>
        const socket = io();
        const remoteVideo = document.getElementById("remoteVideo");
        const cameraButton = document.getElementById("cameraButton");
        const cameraIcon = document.getElementById("camera-icon");
        const muteButton = document.getElementById("mute-button");
        const muteIcon = document.getElementById("mute-icon");
        
        let localStream = null;
        let audioStream = null;  // Separate audio stream
        let peerConnection = null;
        let isCameraOn = false;
        let isMuted = true;  // Initially muted

        // Function to initialize audio stream (mic only)
        async function initAudioStream() {
            try {
                audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
                audioStream.getAudioTracks()[0].enabled = !isMuted;  // Set initial mute state
                console.log("Audio stream initialized:", audioStream);
                // Add audio track to the peer connection if available
                if (peerConnection && audioStream) {
                    audioStream.getTracks().forEach(track => peerConnection.addTrack(track, audioStream));
                }
            } catch (error) {
                console.error("Error accessing microphone:", error);
            }
        }

        // Function to toggle Mute/Unmute
        muteButton.addEventListener("click", () => {
            if (!audioStream) {
                // Initialize audio stream if not already done
                initAudioStream().then(() => {
                    toggleAudio();
                });
            } else {
                toggleAudio();
            }
        });

        function toggleAudio() {
            isMuted = !isMuted;
            muteIcon.src = isMuted ? muteIcon.getAttribute('data-hide') : muteIcon.getAttribute('data-show');
            if (audioStream && audioStream.getAudioTracks().length > 0) {
                audioStream.getAudioTracks()[0].enabled = !isMuted;
                console.log("Audio muted:", isMuted);
                socket.emit('audio-mute', { isMuted });
            }
        }

        // Function to stop the audio stream completely
        function stopAudioStream() {
            if (audioStream) {
                audioStream.getTracks().forEach(track => track.stop());
                audioStream = null;
            }
        }

        // Function to toggle camera on/off
        async function toggleCamera() {
            if (isCameraOn) {
                stopCamera();
            } else {
                startCamera();
            }
        }

        async function startCamera() {
            try {
                // Access video (audio already accessed separately in initAudioStream)
                localStream = await navigator.mediaDevices.getUserMedia({ video: true });
                cameraIcon.src = cameraIcon.getAttribute('data-show');
                isCameraOn = true;

                createPeerConnection();

                // Add each video track to the peer connection
                localStream.getTracks().forEach(track => peerConnection.addTrack(track, localStream));

                // Send an offer to the other peer
                const offer = await peerConnection.createOffer();
                await peerConnection.setLocalDescription(offer);
                socket.emit('offer', { type: 'offer', sdp: offer.sdp });
            } catch (error) {
                console.error("Error accessing camera:", error);
            }
        }

        function stopCamera() {
            if (localStream) {
                localStream.getTracks().forEach(track => track.stop());
                localStream = null;
            }
            if (peerConnection) {
                peerConnection.close();
                peerConnection = null;
            }

            cameraIcon.src = cameraIcon.getAttribute('data-hide');
            isCameraOn = false;
            remoteVideo.srcObject = null;
            remoteVideo.classList.remove("remote-video-style");
            socket.emit('offer', { type: 'offer', sdp: null });
        }

        function createPeerConnection() {
            peerConnection = new RTCPeerConnection();

            // Handle incoming remote track
            peerConnection.ontrack = (event) => {
                if (event.streams && event.streams[0]) {
                    remoteVideo.srcObject = event.streams[0];
                    console.log("Received remote stream:", event.streams[0]);
                } else {
                    console.warn("No streams in ontrack event.");
                }
                remoteVideo.classList.add("remote-video-style");
            };

            // Handle ICE candidates
            peerConnection.onicecandidate = (event) => {
                if (event.candidate) {
                    socket.emit('candidate', { candidate: event.candidate });
                }
            };

            // Add audio stream independently of video
            if (audioStream) {
                audioStream.getTracks().forEach(track => peerConnection.addTrack(track, audioStream));
            }
        }

        // Socket event listeners for signaling
        socket.on("offer", async (data) => {
            if (data.sdp) {
                if (!peerConnection) createPeerConnection();
                await peerConnection.setRemoteDescription(new RTCSessionDescription({ type: "offer", sdp: data.sdp }));
                const answer = await peerConnection.createAnswer();
                await peerConnection.setLocalDescription(answer);
                socket.emit("answer", { type: "answer", sdp: answer.sdp });
            } else {
                if (peerConnection) {
                    peerConnection.close();
                    peerConnection = null;
                }
                remoteVideo.srcObject = null;
                remoteVideo.classList.remove("remote-video-style");
            }
        });

        socket.on("answer", async (data) => {
            if (peerConnection) {
                await peerConnection.setRemoteDescription(new RTCSessionDescription({ type: "answer", sdp: data.sdp }));
            }
        });

        socket.on("candidate", async (data) => {
            if (peerConnection && data.candidate) {
                await peerConnection.addIceCandidate(new RTCIceCandidate(data.candidate));
            }
        });

        // Handle mute/unmute for remote audio
        socket.on("audio-mute", (data) => {
            if (remoteVideo.srcObject && remoteVideo.srcObject.getAudioTracks().length > 0) {
                remoteVideo.srcObject.getAudioTracks()[0].enabled = !data.isMuted;
                console.log("Remote audio muted:", data.isMuted);
            }
        });
    </script>
</body>
</html>

Below is the app.py code i am using –

from flask import Flask, render_template, request, redirect, url_for, abort
from flask_socketio import SocketIO, emit

app = Flask(__name__)
socketio = SocketIO(app)

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/candidate', methods = ['GET'])
def candidateLogin():
    return render_template('user1.html')

@app.route('/interviewer', methods = ['GET'])
def interviewerLogin():
    return render_template('user2.html')

@app.route('/candidate_interface')
def candidateInterface():
    name = request.args.get('name')
    return render_template('user1_interface.html')

@app.route('/interviewer_interface')
def interviewerInterface():
    name = request.args.get('name')
    return render_template('user2_interface.html')

@app.route('/candidate_signin', methods = ['POST'])
def candidateSignin():
    name = request.args.get('name')
    print(name)
    return redirect(url_for('candidateInterface'))

@app.route('/interviewe_signin', methods = ['POST'])
def intervieweSignin():
    name = request.args.get('name')
    print(name)
    return redirect(url_for('interviewerInterface'))

@socketio.on('offer')
def handle_offer(data):
    print("offer: ", data, 'n')
    emit('offer', data, broadcast=True, include_self=False)

@socketio.on('answer')
def handle_answer(data):
    print("answer: ", data, 'n')
    emit('answer', data, broadcast=True, include_self=False)

@socketio.on('candidate')
def handle_candidate(data):
    print("candidate: ", data, 'n')
    emit('candidate', data, broadcast=True, include_self=False)

@socketio.on('audio-mute')
def handle_audio_mute(data):
    print("audio-mute:", data, 'n')
    emit('audio-mute', data, broadcast=True, include_self=False)

if __name__ == '__main__':
    socketio.run(app, debug=True)

As i am very much new to this so I am unable to understand where i am going wrong. Thanks in advance for any suggestion.

I have tried