JavaScript Regular Expressions for special characters – Blancer.com Tutorials and projects

I’m making some code editor, and I use regular expressions to colorize some words. I want to accept all characters, but I want to divide some characters like numbers, {}, (), keywords (int, bool, …). So I tried this regular expression:

/([a-zA-Z_]w*|d+|[(){}#[]!"$%&/=?*+-;,:.<>@#\|€÷×¤ß¸¨'˝´˙`˛°˘^ˇ~])|(s+)/g

And output with it is:

["#", "include", " ", "int", "(","bool", ")", "01", {", "while", "}", "0", "/", "/"]

So as I mentioned, I want every number divided: ["0", "1"],

keywords: ["int, "bool"],

brackets: ["(", ")", "{", "}"],

includes: ["#include", "#ifdef", "#ifndef", "#define"]

The first problem at includes is that RegEx maybe cannot accept full words like #include, so if anyone can show me how to get all text after # to some point, like include (prove me wrong),
if it can’t, I will use to color like this [“#”, “include”],

And last, the second problem is with a comment, I don’t want to mark one per one on the line, beacuse divison /, and I want to detect it like this:

["//", "/*", "*/"]

And I want to color the whole line for //, and the whole text until end of “BIG” comment like this:

/*text text text
  text text text
  text text text*/

Also very important, IT NEEDS TO ACCEPT SPACE. Sorry for different topics!

With my regular expression:

With my RegEx- i need 10 reputation to post images
Expected:

My code:

var editor = document.getElementById("editor");
editor.addEventListener("input", handleInput);

function getCaretPosition() {
    var caretOffset = 0;
    var selection = window.getSelection();
    if (selection.rangeCount > 0) {
        var range = selection.getRangeAt(0).cloneRange();
        range.selectNodeContents(editor);
        range.setEnd(selection.focusNode, selection.focusOffset);
        caretOffset = range.toString().length;
    }
    return caretOffset;
}

function setCaretPosition(position) {
    var selection = window.getSelection();
    var range = document.createRange();
    var textNodes = getTextNodes(editor);
    var currentNode = null;
    var charCount = 0;
    for (var i = 0; i < textNodes.length; i++) {
        var node = textNodes[i];
        var nodeLength = node.textContent.length;
        if (position <= charCount + nodeLength) {
            currentNode = node;
            break;
        }
        charCount += nodeLength;
    }
    if (currentNode) {
        var offset = position - charCount;
        range.setStart(currentNode, offset);
        range.collapse(true);
        selection.removeAllRanges();
        selection.addRange(range);
    }
}

function getTextNodes(node) {
    var textNodes = [];
    function traverse(node) {
        if (node.nodeType === Node.TEXT_NODE) {
            textNodes.push(node);
        } else {
            var childNodes = node.childNodes;
            for (var i = 0; i < childNodes.length; i++) {
                traverse(childNodes[i]);
            }
        }
    }
    traverse(node);
    return textNodes;
}

function handleInput() {
    var userInput = editor.textContent;
    var currentPosition = getCaretPosition();
    var formattedContent = formatContent(userInput);
    editor.innerHTML = formattedContent;
    setCaretPosition(currentPosition);
}

function formatContent(input) {
    var formattedHTML = "";
    var regex = /([a-zA-Z_]w*|d+|[(){}#[]!"$%&/=?*+-;,:.<>@#\|€÷×¤ß¸¨'˝´˙`˛°˘^ˇ~])|(s+)/g;
    var tokens = input.match(regex);
    console.log(tokens);
    
    if (tokens) {
        for (var i = 0; i < tokens.length; i++) {
            var token = tokens[i];
            if (keywords.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='keywords'>" + token + "</span>";
            } else if (brackets.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='brackets'>" + token + "</span>";
            } else if (comNum.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='comNum'>" + token + "</span>";
            } else if (defs.indexOf(token.trim()) > -1) {
                formattedHTML += "<span class='defs'>" + token + "</span>";
            } else {
                formattedHTML += token;
            }
        }
    }
    return formattedHTML;
}