I’m making some code editor, and I use regular expressions to colorize some words. I want to accept all characters, but I want to divide some characters like numbers, {}, (), keywords (int, bool, …). So I tried this regular expression:
/([a-zA-Z_]w*|d+|[(){}#[]!"$%&/=?*+-;,:.<>@#\|€÷פ߸¨'˝´˙`˛°˘^ˇ~])|(s+)/g
And output with it is:
["#", "include", " ", "int", "(","bool", ")", "01", {", "while", "}", "0", "/", "/"]
So as I mentioned, I want every number divided: ["0", "1"]
,
keywords: ["int, "bool"]
,
brackets: ["(", ")", "{", "}"]
,
includes: ["#include", "#ifdef", "#ifndef", "#define"]
The first problem at includes is that RegEx maybe cannot accept full words like #include, so if anyone can show me how to get all text after # to some point, like include (prove me wrong),
if it can’t, I will use to color like this [“#”, “include”],
And last, the second problem is with a comment, I don’t want to mark one per one on the line, beacuse divison /
, and I want to detect it like this:
["//", "/*", "*/"]
And I want to color the whole line for //
, and the whole text until end of “BIG” comment like this:
/*text text text
text text text
text text text*/
Also very important, IT NEEDS TO ACCEPT SPACE. Sorry for different topics!
With my regular expression:
Expected:
My code:
var editor = document.getElementById("editor");
editor.addEventListener("input", handleInput);
function getCaretPosition() {
var caretOffset = 0;
var selection = window.getSelection();
if (selection.rangeCount > 0) {
var range = selection.getRangeAt(0).cloneRange();
range.selectNodeContents(editor);
range.setEnd(selection.focusNode, selection.focusOffset);
caretOffset = range.toString().length;
}
return caretOffset;
}
function setCaretPosition(position) {
var selection = window.getSelection();
var range = document.createRange();
var textNodes = getTextNodes(editor);
var currentNode = null;
var charCount = 0;
for (var i = 0; i < textNodes.length; i++) {
var node = textNodes[i];
var nodeLength = node.textContent.length;
if (position <= charCount + nodeLength) {
currentNode = node;
break;
}
charCount += nodeLength;
}
if (currentNode) {
var offset = position - charCount;
range.setStart(currentNode, offset);
range.collapse(true);
selection.removeAllRanges();
selection.addRange(range);
}
}
function getTextNodes(node) {
var textNodes = [];
function traverse(node) {
if (node.nodeType === Node.TEXT_NODE) {
textNodes.push(node);
} else {
var childNodes = node.childNodes;
for (var i = 0; i < childNodes.length; i++) {
traverse(childNodes[i]);
}
}
}
traverse(node);
return textNodes;
}
function handleInput() {
var userInput = editor.textContent;
var currentPosition = getCaretPosition();
var formattedContent = formatContent(userInput);
editor.innerHTML = formattedContent;
setCaretPosition(currentPosition);
}
function formatContent(input) {
var formattedHTML = "";
var regex = /([a-zA-Z_]w*|d+|[(){}#[]!"$%&/=?*+-;,:.<>@#\|€÷פ߸¨'˝´˙`˛°˘^ˇ~])|(s+)/g;
var tokens = input.match(regex);
console.log(tokens);
if (tokens) {
for (var i = 0; i < tokens.length; i++) {
var token = tokens[i];
if (keywords.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='keywords'>" + token + "</span>";
} else if (brackets.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='brackets'>" + token + "</span>";
} else if (comNum.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='comNum'>" + token + "</span>";
} else if (defs.indexOf(token.trim()) > -1) {
formattedHTML += "<span class='defs'>" + token + "</span>";
} else {
formattedHTML += token;
}
}
}
return formattedHTML;
}