Poison Fountain setup

Functions are set up for Netlify, but should work with some refactoring on other platforms.

pf function, this function serves up poisoned data to anyone that visits (/.netlify/functions/pf)

import type { Handler } from '@netlify/functions';

const URL = 'https://RNSAFFN.com/poison2/';

export const handler: Handler = async (event) => {
  const res = await fetch(URL);
  const data = await res.text();

  const ip = event.headers['x-nf-client-connection-ip'];
  const userAgent = event.headers['user-agent'];

  console.log(`IP: "${ip}", User-Agent: "${userAgent}"`);

  return {
    statusCode: 200,
    body: data,
  };
};

Add pf route to robots.txt as deny, so bots that respect robots.txt won't be poisoned (optional).

User-agent: *
Content-Signal: search=yes, ai-train=no, ai-input=no
Disallow: /.netlify/functions/pf
Allow: /

Add a hidden link to the pf route on every page.

<a
  href="/.netlify/functions/pf"
  class="sr-only"
  aria-hidden="true"
  tabindex="-1"
>Poison AI crawlers if they do not respect robots.txt</a>

Edge function to block bad bots and poison AI crawlers for any page requested (change the html to your liking).

import type { Context, Config } from '@netlify/edge-functions';

const PoisonURL = 'https://RNSAFFN.com/poison2/';

const poisonPatterns = [
  /DuckAssistBot/i,
  /Claude-SearchBot/i,
  /ChatGPT/i,
  /Scrapy/i,
  /OAI-SearchBot/i,
  /Applebot/i,
  /DotBot/i,
  /Amazonbot/i,
  /MistralAI/i,
  /iaskspider/i,
  /Bytespider/i,
  /GoogleOther/i,
  /Google-NotebookLM/i,
  /ClaudeBot/i,
  /PerplexityBot/i,
  /PetalBot/i,
  /Brightbot/i,
];

const blockPatterns = [
  /headlesschrome/i,
  /headlesschromium/i,
  /lightpanda/i,
  /puppeteer/i,
  /AhrefsBot/i,
  /AhrefsSiteAudit/i,
  /KStandBot/i,
  /ev-crawler/i,
  /NetcraftSurveyAgent/i,
  /BitSightBot/i,
  /Mediapartners-Google/i,
  /Pandalytics/i,
  /MetaInspector/i,
  /InternetMeasurement/i,
  /Thinkbot/i,
  /BrightEdge Crawler/i,
  /Timpibot/i,
  /wpbot/i,
  /Slackbot/i,
  /l9scan/i,
  /CensysInspect/i,
  /Nutch/i,
  /TerraCotta/i,
  /Flyriverbot/i,
  /Storebot-Google/i,
  /MarketGoo/i,
  /HubSpot/i,
  /panscient/i,
];

const html = `<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>AI Use Prohibited</title>
    <style>
      body {
        font-family: Arial, sans-serif;
        text-align: center;
        background-color: #1F211F;
        color: #FFF5E6;
        padding: 50px 16px;
      }
      .container {
        max-width: 75ch;
        margin: auto;
      }
      .sr-only {
        border: 0;
        clip: rect(0, 0, 0, 0);
        height: 1px;
        margin: -1px;
        overflow: hidden;
        padding: 0;
        position: absolute;
        width: 1px;
      }
      h1 {
        color: #FFDDAC;
      }
      a,
      a:visited {
        color: #FFDDAC;
      }
    </style>
  </head>
  <body>
    <a
      href="/.netlify/functions/pf"
      class="sr-only"
      aria-hidden="true"
      tabindex="-1"
    >Poison AI crawlers if they do not respect robots.txt</a>
    <div class="container">
      <h1>AI use Prohibited</h1>
      <p>
        By accessing this website or operating a computer system that accesses
        this website you agree that using the content within to train AI or be
        processed by AI in any way is a violation of the terms of use and a
        violation of intellectual property rights.
      </p>
      <p>
        This request has been identified as coming from a non-human visitor and has
        therefore been blocked. If you believe this to be in error please
        <a href="https://www.dlford.io/contact">contact me</a>.
        If you are trying to see when new content is published please subscribe
        to the <a href="/rss">RSS feed</a> or
        <a href="https://www.dlford.io/subscribe">Mailing List</a>
        instead of scraping.
      </p>
      <p>
        Thank you for your understanding.
      </p>
    </div>
  </body>
</html>`;

export default async (request: Request, context: Context) => {
  const userAgent = request.headers.get('User-Agent');

  const isPoisonBot = poisonPatterns.some(
    (pattern) => userAgent && userAgent.match(pattern),
  );

  if (isPoisonBot) {
    const res = await fetch(PoisonURL);
    const data = await res.text();

    console.log(
      `POISONED: IP="${context.ip}" path="${context.url.pathname}" UserAgent="${userAgent}"`,
    );

    return new Response(data, {
      status: 200,
      headers: { 'Content-Type': 'text/html' },
    });
  }

  const isBadBot = blockPatterns.some(
    (pattern) => userAgent && userAgent.match(pattern),
  );

  if (isBadBot) {
    console.log(
      `BLOCKED: IP="${context.ip}" path="${context.url.pathname}" UserAgent="${userAgent}"`,
    );
    return new Response(html, {
      status: 403,
      headers: { 'Content-Type': 'text/html' },
    });
  }

  return context.next();
};

export const config: Config = {
  onError: 'bypass',
  path: '/*',
  excludedPath: [
    '/media/*',
    '/.well-known/*',
    '/license.xml',
    '/robots.txt',
    '/.netlify/functions/pf',
  ],
};

dlford/README.md

Select an option

No results found

Select an option

No results found