How to edit several audio clips into a larger audio clip for download in one audio file (HTML, JS)?

How can I mix several audio clips together in order to make one assembled audio file for download. Like audio Mad Libs. Remember Mad Libs?

Here’s what I’ve got so far (it’s not much):

var audio = document.getElementById('mad-libs')
var audio1 = document.getElementById('1')
var audio2 = document.getElementById('2')
var audio3 = document.getElementById('3')
var audio4 = document.getElementById('4')
var audio5 = document.getElementById('5')
var audio6 = document.getElementById('6')
var record = document.getElementById('record')
var mediaRecorder = null
var recording = false
var chunks = []

recordAndStore(audio1)

function recordAndStore (slot) {
  navigator.mediaDevices.getUserMedia({
    audio:true
  }).then(function (stream) {
    mediaRecorder = new MediaRecorder(stream)
    record.addEventListener('click', function (event) {
      if (recording == false) {
        recording = true
        mediaRecorder.start()
        record.textContent = '⏹️ Stop'
      } else {
        recording = false
        mediaRecorder.stop()
        record.textContent = '⏺️ Record'
      }
      mediaRecorder.ondataavailable = function (data) {
        var audioURL = window.URL.createObjectURL(data.data)
        slot.src = audioURL
      }
    })
  }).catch(function (error) {
    console.log(error)
  })
}
audio.hidden {
  display: none;
}
button {
  font-size: 1.35em;
  padding: .25em .5em;
  cursor: pointer;
}
<h1>Mad Libs</h1>
<p>Get Ready To Record Your Mad Libs</p>
<p>Click the record button and say a "noun," "verb," "adjective," or anything else you're prompted for ยป</p>
<h4 id='prompt'>Common Noun</h4>
<button id="record">⏺️ Record</button>
<audio id="mad-libs" src='audios/mad-libs.m4a' controls class='hidden'></audio>
<audio id='1' controls></audio>
<audio id='2' controls></audio>
<audio id='3' class='hidden'></audio>
<audio id='4' class='hidden'></audio>
<audio id='5' class='hidden'></audio>
<audio id='6' class='hidden'></audio>
<button id="download">Download</button>

<!--

This is the mad libs story...

Once upon a time there was a king with a great big
______ (common noun).

And every morning he took it out on the royal balcony
and ______ (verb, past tense) it for at least 3 hours.

This exhausted the king, and he was very ______
(adjective) to begin with.

The people had gotten very ______ (adjective) with the
king's morning routine.

So they gathered all their ______ (noun, plural),
stormed the castle and ______ (verb, pased tense) the
king.

THE END


-->

I’d like the user to be able to record their prompts (noun, verb, adjective, etc), and then play them back in the context of the mad libs story (which is recorded ahead of time for them in an animated voice).

Plus! Importantly, I want to let them download their creation with a download button.

I’ve gotten lost in Array Buffers and Blobs, and I can’t make much sense of how to do this. Am I better off trying to use WASM with ffmpeg? Or Media Recorder API? Help would be awesome : )