Ref: https://rnsaffn.com/poison3/
Functions are set up for Netlify, but should work with some refactoring on other platforms.
pffunction, this function serves up poisoned data to anyone that visits (/.netlify/functions/pf)import type { Handler } from '@netlify/functions'; const URL = 'https://RNSAFFN.com/poison2/'; export const handler: Handler = async (event) => { const res = await fetch(URL); const data = await res.text(); const ip = event.headers['x-nf-client-connection-ip']; const userAgent = event.headers['user-agent']; console.log(`IP: "${ip}", User-Agent: "${userAgent}"`); return { statusCode: 200, body: data, }; };
- Add
pfroute torobots.txtas deny, so bots that respectrobots.txtwon't be poisoned (optional).User-agent: * Content-Signal: search=yes, ai-train=no, ai-input=no Disallow: /.netlify/functions/pf Allow: / - Add a hidden link to the
pfroute on every page.<a href="/.netlify/functions/pf" class="sr-only" aria-hidden="true" tabindex="-1" >Poison AI crawlers if they do not respect robots.txt</a>
- Edge function to block bad bots and poison AI crawlers for any page requested (change the html to your liking).
import type { Context, Config } from '@netlify/edge-functions'; const PoisonURL = 'https://RNSAFFN.com/poison2/'; const poisonPatterns = [ /DuckAssistBot/i, /Claude-SearchBot/i, /ChatGPT/i, /Scrapy/i, /OAI-SearchBot/i, /Applebot/i, /DotBot/i, /Amazonbot/i, /MistralAI/i, /iaskspider/i, /Bytespider/i, /GoogleOther/i, /Google-NotebookLM/i, /ClaudeBot/i, /PerplexityBot/i, /PetalBot/i, /Brightbot/i, ]; const blockPatterns = [ /headlesschrome/i, /headlesschromium/i, /lightpanda/i, /puppeteer/i, /AhrefsBot/i, /AhrefsSiteAudit/i, /KStandBot/i, /ev-crawler/i, /NetcraftSurveyAgent/i, /BitSightBot/i, /Mediapartners-Google/i, /Pandalytics/i, /MetaInspector/i, /InternetMeasurement/i, /Thinkbot/i, /BrightEdge Crawler/i, /Timpibot/i, /wpbot/i, /Slackbot/i, /l9scan/i, /CensysInspect/i, /Nutch/i, /TerraCotta/i, /Flyriverbot/i, /Storebot-Google/i, /MarketGoo/i, /HubSpot/i, /panscient/i, ]; const html = `<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <title>AI Use Prohibited</title> <style> body { font-family: Arial, sans-serif; text-align: center; background-color: #1F211F; color: #FFF5E6; padding: 50px 16px; } .container { max-width: 75ch; margin: auto; } .sr-only { border: 0; clip: rect(0, 0, 0, 0); height: 1px; margin: -1px; overflow: hidden; padding: 0; position: absolute; width: 1px; } h1 { color: #FFDDAC; } a, a:visited { color: #FFDDAC; } </style> </head> <body> <a href="/.netlify/functions/pf" class="sr-only" aria-hidden="true" tabindex="-1" >Poison AI crawlers if they do not respect robots.txt</a> <div class="container"> <h1>AI use Prohibited</h1> <p> By accessing this website or operating a computer system that accesses this website you agree that using the content within to train AI or be processed by AI in any way is a violation of the terms of use and a violation of intellectual property rights. </p> <p> This request has been identified as coming from a non-human visitor and has therefore been blocked. If you believe this to be in error please <a href="https://www.dlford.io/contact">contact me</a>. If you are trying to see when new content is published please subscribe to the <a href="/rss">RSS feed</a> or <a href="https://www.dlford.io/subscribe">Mailing List</a> instead of scraping. </p> <p> Thank you for your understanding. </p> </div> </body> </html>`; export default async (request: Request, context: Context) => { const userAgent = request.headers.get('User-Agent'); const isPoisonBot = poisonPatterns.some( (pattern) => userAgent && userAgent.match(pattern), ); if (isPoisonBot) { const res = await fetch(PoisonURL); const data = await res.text(); console.log( `POISONED: IP="${context.ip}" path="${context.url.pathname}" UserAgent="${userAgent}"`, ); return new Response(data, { status: 200, headers: { 'Content-Type': 'text/html' }, }); } const isBadBot = blockPatterns.some( (pattern) => userAgent && userAgent.match(pattern), ); if (isBadBot) { console.log( `BLOCKED: IP="${context.ip}" path="${context.url.pathname}" UserAgent="${userAgent}"`, ); return new Response(html, { status: 403, headers: { 'Content-Type': 'text/html' }, }); } return context.next(); }; export const config: Config = { onError: 'bypass', path: '/*', excludedPath: [ '/media/*', '/.well-known/*', '/license.xml', '/robots.txt', '/.netlify/functions/pf', ], };