mekarpeles · January 5, 2026 14:01
diff --git a/grx.js b/grx.js
 /**
 * Extracts key information from a single Goodreads BookListItem HTML element.
 *
 * @param {HTMLElement} bookArticleElement The <article class="BookListItem"> element.
 * @returns {object|null} An object containing the extracted data, or null if essential data is missing.
 */
 function extractBookData(bookArticleElement) {
    if (!bookArticleElement || bookArticleElement.tagName.toLowerCase() !== 'article') {
        console.error("Invalid element provided. Must be an <article> element.");
        return null;
    }

    // Initialize the data object with all three separate rating/count fields
    const bookData = {
        rank: null,
        title: null,
        url: null,
        authors: [],
        averageRating: null,    // Correct: For the 3.84 value
        ratingsCount: null,     // Correct: For the 44.6k ratings value
        shelvingsCount: null,   // Correct: For the 156k shelvings value
        coverUrl: null
    };

    // --- 1. Extract Rank (#200) ---
    const rankElement = bookArticleElement.querySelector('.BookListItemRank h2');
    if (rankElement) {
        bookData.rank = rankElement.textContent.trim().replace('#', '');
    }

    // --- 2. Extract Title and URL ---
    const titleLink = bookArticleElement.querySelector('a[data-testid="bookTitle"]');
    if (titleLink) {
        bookData.title = titleLink.textContent.trim();
        bookData.url = titleLink.href;
    } else {
        // Essential data missing, stop processing
        console.warn("Could not find book title/URL for an item.");
        return null;
    }

    // --- 3. Extract Author(s) ---
    // Authors are within 'ContributorLink__name' inside a ContributorLinksList
    const authorNames = bookArticleElement.querySelectorAll('.ContributorLink__name');
    authorNames.forEach(nameEl => {
        bookData.authors.push(nameEl.textContent.trim());
    });

    // --- 4. Extract Average Rating (e.g., 3.84) ---
    const averageRatingValue = bookArticleElement.querySelector('[data-testid="ratingValue"] .Text__semibold');
    if (averageRatingValue) {
        bookData.averageRating = parseFloat(averageRatingValue.textContent.trim());
    }

    // --- 5. Extract Ratings Count (e.g., 44.6k) ---
    const ratingsCountElement = bookArticleElement.querySelector('[data-testid="ratingsCount"] .Text__subdued');
    if (ratingsCountElement) {
        // Example: "44.6k ratings" -> "44.6k"
        bookData.ratingsCount = ratingsCountElement.textContent.trim().replace(' ratings', '');
    }

    // --- 6. Extract Shelving Count (e.g., 156k) ---
    // We use the robust Regex solution on the entire container text.
    const shelvingsContainer = bookArticleElement.querySelector('.BookListItemRating');
    if (shelvingsContainer) {
        const text = shelvingsContainer.textContent;
        // Regex: Finds a number (with optional decimal), followed by 'k' or 'm', followed by ' shelvings'. Captures the number/k/m part.
        const match = text.match(/([\d\.]+k|[\d\.]+m)\s*shelvings/i);
        
        if (match && match[1]) {
            bookData.shelvingsCount = match[1].trim(); // Capture only the count part (e.g., "156k")
        }
    }

    // --- 7. Extract Cover URL ---
    // The cover image uses srcset, we'll try to get the 1x size from the srcset attribute
    const coverImage = bookArticleElement.querySelector('.BookCover__image img.ResponsiveImage');
    if (coverImage) {
        const srcset = coverImage.getAttribute('srcset');
        if (srcset) {
            // Split by comma, take the first entry (1x image), and split by space to get the URL
            const urlMatch = srcset.split(',')[0].split(' ')[0];
            bookData.coverUrl = urlMatch;
        } else {
            // Fallback to the standard src attribute
            bookData.coverUrl = coverImage.src;
        }
    }

    return bookData;
 }

 /**
 * Main function to run the extraction on the entire page (or a container).
 */
 function extractAllBookData() {
    // Select all the book records on the page
    const bookElements = document.querySelectorAll('article.BookListItem');

    const results = [];
    bookElements.forEach((element) => {
        const data = extractBookData(element);
        if (data) {
            results.push(data);
        }
    });

    return results;
 }

 /**
 * Executes the book data extraction and automatically triggers a JSON file download.
 *
 * @param {string} filename The name for the downloaded file (e.g., 'goodreads_data.json').
 */
 function downloadBookDataAsJson(filename = 'goodreads_extracted_data.json') {
    // 1. Extract the data using the previously defined function
    const dataArray = extractAllBookData();

    if (dataArray.length === 0) {
        console.warn("No book data was extracted. Download cancelled.");
        return;
    }
    
    // 2. Convert the JavaScript array into a neatly formatted JSON string
    const jsonString = JSON.stringify(dataArray, null, 2); 

    // 3. Create a Blob (Binary Large Object) containing the JSON data
    // The type is 'application/json'
    const blob = new Blob([jsonString], { type: 'application/json' });

    // 4. Create an invisible download link element
    const url = URL.createObjectURL(blob);
    const a = document.createElement('a');
    
    // 5. Configure the download
    a.href = url;
    a.download = filename; // Set the desired file name
    
    // 6. Simulate a click on the link to trigger the download
    document.body.appendChild(a); // Append to the body (necessary for some browsers)
    a.click();
    
    // 7. Clean up the temporary elements
    document.body.removeChild(a);
    URL.revokeObjectURL(url); // Free up the resource URL

    console.log(`✅ Success! Download of '${filename}' should have started.`);
    console.log(`Total books extracted: ${dataArray.length}`);
 }

 // EXECUTION: Define the functions and immediately run them
 extractAllBookData();
 downloadBookDataAsJson();
	/**
	* Extracts key information from a single Goodreads BookListItem HTML element.
	*
	* @param {HTMLElement} bookArticleElement The <article class="BookListItem"> element.
	* @returns {object\|null} An object containing the extracted data, or null if essential data is missing.
	*/
	function extractBookData(bookArticleElement) {
	if (!bookArticleElement \|\| bookArticleElement.tagName.toLowerCase() !== 'article') {
	console.error("Invalid element provided. Must be an <article> element.");
	return null;
	}

	// Initialize the data object with all three separate rating/count fields
	const bookData = {
	rank: null,
	title: null,
	url: null,
	authors: [],
	averageRating: null, // Correct: For the 3.84 value
	ratingsCount: null, // Correct: For the 44.6k ratings value
	shelvingsCount: null, // Correct: For the 156k shelvings value
	coverUrl: null
	};

	// --- 1. Extract Rank (#200) ---
	const rankElement = bookArticleElement.querySelector('.BookListItemRank h2');
	if (rankElement) {
	bookData.rank = rankElement.textContent.trim().replace('#', '');
	}

	// --- 2. Extract Title and URL ---
	const titleLink = bookArticleElement.querySelector('a[data-testid="bookTitle"]');
	if (titleLink) {
	bookData.title = titleLink.textContent.trim();
	bookData.url = titleLink.href;
	} else {
	// Essential data missing, stop processing
	console.warn("Could not find book title/URL for an item.");
	return null;
	}

	// --- 3. Extract Author(s) ---
	// Authors are within 'ContributorLink__name' inside a ContributorLinksList
	const authorNames = bookArticleElement.querySelectorAll('.ContributorLink__name');
	authorNames.forEach(nameEl => {
	bookData.authors.push(nameEl.textContent.trim());
	});

	// --- 4. Extract Average Rating (e.g., 3.84) ---
	const averageRatingValue = bookArticleElement.querySelector('[data-testid="ratingValue"] .Text__semibold');
	if (averageRatingValue) {
	bookData.averageRating = parseFloat(averageRatingValue.textContent.trim());
	}

	// --- 5. Extract Ratings Count (e.g., 44.6k) ---
	const ratingsCountElement = bookArticleElement.querySelector('[data-testid="ratingsCount"] .Text__subdued');
	if (ratingsCountElement) {
	// Example: "44.6k ratings" -> "44.6k"
	bookData.ratingsCount = ratingsCountElement.textContent.trim().replace(' ratings', '');
	}

	// --- 6. Extract Shelving Count (e.g., 156k) ---
	// We use the robust Regex solution on the entire container text.
	const shelvingsContainer = bookArticleElement.querySelector('.BookListItemRating');
	if (shelvingsContainer) {
	const text = shelvingsContainer.textContent;
	// Regex: Finds a number (with optional decimal), followed by 'k' or 'm', followed by ' shelvings'. Captures the number/k/m part.
	const match = text.match(/([\d\.]+k\|[\d\.]+m)\s*shelvings/i);

	if (match && match[1]) {
	bookData.shelvingsCount = match[1].trim(); // Capture only the count part (e.g., "156k")
	}
	}

	// --- 7. Extract Cover URL ---
	// The cover image uses srcset, we'll try to get the 1x size from the srcset attribute
	const coverImage = bookArticleElement.querySelector('.BookCover__image img.ResponsiveImage');
	if (coverImage) {
	const srcset = coverImage.getAttribute('srcset');
	if (srcset) {
	// Split by comma, take the first entry (1x image), and split by space to get the URL
	const urlMatch = srcset.split(',')[0].split(' ')[0];
	bookData.coverUrl = urlMatch;
	} else {
	// Fallback to the standard src attribute
	bookData.coverUrl = coverImage.src;
	}
	}

	return bookData;
	}

	/**
	* Main function to run the extraction on the entire page (or a container).
	*/
	function extractAllBookData() {
	// Select all the book records on the page
	const bookElements = document.querySelectorAll('article.BookListItem');

	const results = [];
	bookElements.forEach((element) => {
	const data = extractBookData(element);
	if (data) {
	results.push(data);
	}
	});

	return results;
	}

	/**
	* Executes the book data extraction and automatically triggers a JSON file download.
	*
	* @param {string} filename The name for the downloaded file (e.g., 'goodreads_data.json').
	*/
	function downloadBookDataAsJson(filename = 'goodreads_extracted_data.json') {
	// 1. Extract the data using the previously defined function
	const dataArray = extractAllBookData();

	if (dataArray.length === 0) {
	console.warn("No book data was extracted. Download cancelled.");
	return;
	}

	// 2. Convert the JavaScript array into a neatly formatted JSON string
	const jsonString = JSON.stringify(dataArray, null, 2);

	// 3. Create a Blob (Binary Large Object) containing the JSON data
	// The type is 'application/json'
	const blob = new Blob([jsonString], { type: 'application/json' });

	// 4. Create an invisible download link element
	const url = URL.createObjectURL(blob);
	const a = document.createElement('a');

	// 5. Configure the download
	a.href = url;
	a.download = filename; // Set the desired file name

	// 6. Simulate a click on the link to trigger the download
	document.body.appendChild(a); // Append to the body (necessary for some browsers)
	a.click();

	// 7. Clean up the temporary elements
	document.body.removeChild(a);
	URL.revokeObjectURL(url); // Free up the resource URL

	console.log(`✅ Success! Download of '${filename}' should have started.`);
	console.log(`Total books extracted: ${dataArray.length}`);
	}

	// EXECUTION: Define the functions and immediately run them
	extractAllBookData();
	downloadBookDataAsJson();
No results found