Skip to content

Instantly share code, notes, and snippets.

@piotrkulpinski
Last active August 14, 2024 14:01
Show Gist options
  • Select an option

  • Save piotrkulpinski/6282c45bb6ba6c5c5c74d194033b10cb to your computer and use it in GitHub Desktop.

Select an option

Save piotrkulpinski/6282c45bb6ba6c5c5c74d194033b10cb to your computer and use it in GitHub Desktop.
Generate product descriptions and relevant links using Anthropic API
import { createAnthropic } from "@ai-sdk/anthropic"
import Firecrawl from "@mendable/firecrawl-js"
import type { Tool } from "@prisma/client"
import { json } from "@remix-run/node"
import { generateObject } from "ai"
import { z } from "zod"
import { prisma } from "~/services.server/prisma"
const anthropic = createAnthropic()
const model = anthropic("claude-3-5-sonnet-20240620")
const firecrawl = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY })
const processToolsInBatches = async (tools: Tool[], batchSize: number) => {
const batches = []
const responses = []
for (let i = 0; i < tools.length; i += batchSize) {
const batch = tools.slice(i, i + batchSize)
batches.push(batch)
}
for (const batch of batches) {
console.log(`Processing ${batches.indexOf(batch) + 1} of ${batches.length} batches...`)
const promises = batch.map(async tool => {
try {
const scrapedData = await firecrawl.scrapeUrl(tool.website)
if (!scrapedData.success) {
return null
}
const { object } = await generateObject({
model,
system: `
You are an expert content creator specializing in open source products.
Your task is to generate high-quality, engaging content to display on a directory website.
You do not use any catchphrases like "Empower", "Streamline" etc.
`,
prompt: `
Provide me details for the following data: ${scrapedData.data?.content}
`,
schema: z.object({
tagline: z
.string()
.describe(
"A tagline (up to 60 characters) that captures the essence of the tool. Should not include the tool name.",
),
description: z
.string()
.describe(
"A concise description (up to 160 characters) that highlights the main features and benefits. Should not include the tool name.",
),
content: z
.string()
.describe(
"A detailed and engaging longer description with key benefits (up to 1000 characters). Can be Markdown formatted, but should start with paragraph. Make sure the lists use correct Markdown syntax.",
),
links: z
.array(z.object({ name: z.string(), url: z.string().url() }))
.describe(
"A list of relevant links to pricing information, documentation, social profiles and other resources. Make sure to include the name of the link and the URL. Social profiles should be last. Skip landing page and Github repository links.",
),
}),
})
console.log(`${tool.id} scraped and content generated successfully.`)
return await prisma.tool.update({
where: { id: tool.id },
data: {
tagline: object.tagline,
description: object.description,
content: object.content,
links: object.links,
},
})
} catch (e) {
console.error(`Tool content generation failed for ${tool.name}: `, e)
return null
}
})
responses.push(await Promise.all(promises))
if (batches.indexOf(batch) !== batches.length - 1) {
console.log("Waiting for 60 seconds before processing the next batch...")
await new Promise(r => setTimeout(r, 60_000)) // Wait 60s between requests
}
}
return responses
}
export async function action() {
const tools = await prisma.tool.findMany({
where: { publishedAt: { not: null }, tagline: null },
orderBy: { createdAt: "asc" },
})
console.log(`Found ${tools.length} tools to process. Starting...`)
const response = await processToolsInBatches(tools, 3)
return json(response)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment