I am working on a project where I need to extract specific information from URLs, specifically the environment, domain, and hostname. The URLs have variable subdomains, and I’m having difficulty constructing a regex pattern to capture the required groups.
Here are some examples of the URLs I’m working with:
- https://example.test.qa.sub.hostname.com
- https://example.test.stage.coonect.hostname.com
- https://example.qa.hostname.com https://example.stage.hostname.com
- https://example.test.hostname.com
- https://ops-cert-stage-beta.apps.sub-test.minor.qa.test.sub.hostname.com
- https://ops-cert-qa-beta.apps.sub-test.minor.qa.test.sub.hostname.com
- https://ops-cert-qa.apps.sub-test.minor.qa.test.sub.hostname.com
- https://ops-cert-stage.apps.sub-test.minor.qa.test.sub.hostname.com
I need help crafting a regex pattern that can efficiently capture the following groups:
Group 1: Environment (e.g., test, stage, qa)
Group 2: Domain (e.g., example, ops-cert-qa-beta)
Group 3: Hostname (e.g., hostname)
const regex = /.*?(?<env>(qa|stage|dev|preprod|test)).*?.(?<host>[w]+).(?<domain>w+)$/;
function extractInfoFromURL(url) {
const match = url.match(regex);
if (match) {
const environment = match.groups.env;
const domain = match.groups.domain;
const hostname = match.groups.host;
return { environment, domain, hostname };
} else {
return null; // URL didn't match the pattern
}
}
const testUrls = [
"https://example.test.qa.sub.hostname.com",
"https://example.test.stage.coonect.hostname.com",
"https://example.qa.hostname.com",
"https://example.hostname.com",
"https://example.stage.hostname.com",
"https://example.test.hostname.com",
"https://ops-cert-stage-beta.apps.sub-test.minor.qa.test.sub.hostname.com",
"https://ops-cert-qa-beta.apps.sub-test.minor.qa.test.sub.hostname.com",
"https://ops-cert-qa.apps.sub-test.minor.qa.test.sub.hostname.com",
"https://ops-cert-stage.apps.sub-test.minor.qa.test.sub.hostname.com"
];
testUrls.forEach((url, index) => {
const result = extractInfoFromURL(url);
if (result) {
console.log(`Result for URL ${index + 1}:`, result);
} else {
console.log(`URL ${url} did not match the pattern.`);
}
});
Check for Result 3, 1&2 are working fine.