Skip to content

Instantly share code, notes, and snippets.

@steffensbola
Created October 1, 2024 20:26
Show Gist options
  • Select an option

  • Save steffensbola/12a1007db15a7ff44d373393bd15cb62 to your computer and use it in GitHub Desktop.

Select an option

Save steffensbola/12a1007db15a7ff44d373393bd15cb62 to your computer and use it in GitHub Desktop.
Checks if domain is reachable, if it has a robots file or tags that tell the search enginenot to index them.
import axios from 'axios';
interface Subdomain {
url: string;
shouldIndex?: boolean;
shouldIndexRobots?: boolean;
shouldIndexMeta?: boolean;
isAccessible: boolean
}
async function isSubdomainPubliclyAccessible(subdomain: string): Promise<boolean> {
try {
const response = await axios.get(`${subdomain}`, { timeout: 5000 });
return response.status >= 200 && response.status < 400;
} catch (error) {
return false;
}
}
async function checkRobotsTxt(subdomain: string): Promise<boolean|undefined> {
try {
const response = await axios.get(`${subdomain}/robots.txt`);
const robotsTxt = response.data;
const disallowIndexing = robotsTxt.includes('Disallow:') || robotsTxt.includes('User-agent: *');
return !disallowIndexing;
} catch (error) {
console.error(`Error checking robots.txt for ${subdomain}: ${error.message}`);
return undefined; // If we can't check, assume it should be indexed
}
}
async function checkMetaTags(subdomain: string): Promise<boolean|undefined> {
try {
const response = await axios.get(subdomain);
const html = response.data;
const noIndexMetaTag = html.includes('<meta name="robots" content="noindex">');
return !noIndexMetaTag;
} catch (error) {
console.error(`Error checking meta tags for ${subdomain}: ${error.message}`);
return undefined; // If we can't check, assume it should be indexed
}
}
async function checkSubdomain(subdomain: string): Promise<Subdomain> {
const isAccessible = await isSubdomainPubliclyAccessible(subdomain);
const shouldIndexRobots = isAccessible ? await checkRobotsTxt(subdomain) : undefined;
const shouldIndexMeta = isAccessible ? await checkMetaTags(subdomain): undefined;
return {
url: subdomain,
shouldIndex: shouldIndexRobots && shouldIndexMeta,
shouldIndexRobots,
shouldIndexMeta,
isAccessible: isAccessible,
};
}
async function checkSubdomains(subdomains: string[]): Promise<Subdomain[]> {
const results = await Promise.all(subdomains.map(checkSubdomain));
return results;
}
// Example usage:
const subdomains = [
'https://subdomain1.yourdomain.com/',
'https://subdomain2.yourdomain.com/',
'https://subdomain3.yourdomain.com/',
];
checkSubdomains(subdomains).then((results) => {
console.log(results);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment