Created
June 30, 2025 10:12
-
-
Save hashcott/c0d1d983ba2f173a7899b51073c794c6 to your computer and use it in GitHub Desktop.
Crawl meta ad library
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const config = { | |
| urls: { | |
| base: 'https://www.facebook.com', | |
| }, | |
| defaultRequestHeaders: { | |
| 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36', | |
| accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', | |
| 'accept-language': 'en-US,en;q=0.9', | |
| 'content-type': 'text/html; charset="utf-8"', | |
| 'cache-control': 'max-age=0', | |
| 'Upgrade-Insecure-Requests': '1', | |
| 'Sec-Fetch-Dest': 'document', | |
| 'Sec-Fetch-Mode': 'navigate', | |
| 'Sec-Fetch-Site': 'none', | |
| 'Sec-Fetch-User': '?1', | |
| }, | |
| defaultRequestParams: { | |
| __user: '0', | |
| __a: '1', | |
| fb_api_caller_class: 'RelayModern', | |
| fb_api_req_friendly_name: 'AdLibraryMobileFocusedStateProviderRefetchQuery', | |
| server_timestamps: 'true', | |
| __jssesw: '1', | |
| }, | |
| }; | |
| export default config; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import qs from 'qs'; | |
| import { type AxiosResponse, type AxiosRequestConfig } from 'axios'; | |
| import { Proxy922s5, ProxyPlainText } from './proxy'; | |
| import config from '../config'; | |
| import { Logger, Request, Utils } from '../utils'; | |
| import { FileSaver } from './saver'; | |
| type Ads = { | |
| page_id: number; | |
| page_name: string; | |
| }; | |
| type SearchResults = { | |
| count?: number; | |
| edges?: { | |
| node: { | |
| collated_results: Ads[]; | |
| }; | |
| }[]; | |
| page_info: { | |
| has_next_page: boolean; | |
| end_cursor: string; | |
| }; | |
| }; | |
| type Platform = 'FACEBOOK' | 'INSTAGRAM' | 'AUDIENCE_NETWORK' | 'MESSENGER' | 'WHATSAPP' | 'OCULUS' | 'THREADS'; | |
| // https://www.facebook.com/ads/library/api/?source=nav-header | |
| export type GetAdsRequest = { | |
| adType?: 'ALL' | 'EMPLOYMENT_ADS' | 'FINANCIAL_PRODUCTS_AND_SERVICES_ADS' | 'HOUSING_ADS' | 'POLITICAL_AND_ISSUE_ADS'; | |
| page?: number; | |
| query: string; | |
| searchType?: 'KEYWORD_UNORDERED' | 'KEYWORD_EXACT_PHRASE'; | |
| cursor?: string; | |
| contentLanguages?: string[]; | |
| activeStatus?: 'ACTIVE' | 'INACTIVE' | 'ALL'; | |
| mediaType?: 'ALL' | 'IMAGE' | 'MEME' | 'VIDEO' | 'NONE'; | |
| publisherPlatforms?: Platform[]; | |
| country?: string; | |
| startDate?: { | |
| min?: string; // 2018-06-15 | |
| max?: string; // 2025-06-07 | |
| }; | |
| maxRecords?: number; | |
| }; | |
| class MetaAds { | |
| private request: Request; | |
| private sessionId?: string; | |
| private mapQueryParams: Record<string, unknown> = {}; | |
| private adsData: Map<number, string> = new Map(); | |
| private context: string = ''; | |
| constructor(private debug = false) { | |
| this.request = new Request( | |
| { | |
| baseURL: config.urls.base, | |
| headers: config.defaultRequestHeaders, | |
| }, | |
| this.debug | |
| ); | |
| } | |
| clearAdsData(): void { | |
| this.adsData.clear(); | |
| } | |
| private updateCookies(cookies?: string[]): void { | |
| if (!cookies) return; | |
| for (let cookie of cookies) { | |
| if (typeof cookie !== 'string') continue; | |
| const cookiePair = cookie.split(';')[0]; | |
| if (cookiePair?.startsWith('datr=')) { | |
| const cookieValue = `${cookiePair}; dpr=1.53; wd=1920x1080`; | |
| Logger.info(`[${this.context}] - set cookies: ${cookieValue}`); | |
| return this.request.setHeaders({ Cookies: cookieValue }); | |
| } | |
| } | |
| } | |
| private updateMapQueryParams(data?: string): void { | |
| if (!data) return; | |
| const scriptText = data.match( | |
| /<script type="application\/json"(?:\s+data-content-len="\d+")?\s+data-sjs>\{"require":\[\["ScheduledServerJS","handle"[^<]*(?:<\/script>|$)/ | |
| )?.[0]; | |
| if (scriptText) { | |
| const jsonString = scriptText.replace(/<script type="application\/json"(?:\s+data-content-len="\d+")?\s+data-sjs>/, '').replace(/<\/script>|$/, ''); | |
| try { | |
| const jsonData = JSON.parse(jsonString); | |
| const defineData = | |
| Utils.getLastItemFromArray<Array<{ __bbox: { define?: [string, unknown[], Record<string, unknown>, unknown][] } }>>(jsonData.require?.[0]) || []; | |
| for (let define of defineData) { | |
| const box = define?.__bbox?.define || []; | |
| for (let item of box) { | |
| if (item[0] === 'LSD') { | |
| this.mapQueryParams['lsd'] = item[2]['token'] as string; | |
| } else if (item[0] === 'WebConnectionClassServerGuess') { | |
| this.mapQueryParams['__ccg'] = item[2]['connectionClass'] as string; | |
| } else if (item[0] === 'GetAsyncParamsExtraData') { | |
| this.mapQueryParams['__aaid'] = (item[2]['extra_data'] as Record<string, string>)['__aaid']; | |
| } else if (item[0] === 'SiteData') { | |
| const dict = item[2] as Record<string, string>; | |
| this.mapQueryParams['__hs'] = dict['haste_session']; | |
| this.mapQueryParams['__rev'] = dict['__spin_r']; | |
| this.mapQueryParams['__hsi'] = dict['hsi']; | |
| this.mapQueryParams['__spin_r'] = dict['__spin_r']; | |
| this.mapQueryParams['__spin_b'] = dict['__spin_b']; | |
| this.mapQueryParams['__spin_t'] = dict['__spin_t']; | |
| } | |
| } | |
| } | |
| } catch (err: any) { | |
| Logger.error(`[${this.context}] - Failed to parse script data: ${err.message}`); | |
| } | |
| } | |
| } | |
| private updateSessionId(data?: string): void { | |
| this.sessionId = data?.split('sessionId')?.[1]?.split(':')?.[1]?.split(',')?.[0]?.replaceAll('"', ''); | |
| } | |
| private async createSession(): Promise<void> { | |
| const params = { | |
| active_status: 'all', | |
| ad_type: 'all', | |
| country: 'ALL', | |
| is_targeted_country: 'false', | |
| media_type: 'all', | |
| }; | |
| const response = await this.request.send<AxiosResponse>({ | |
| url: '/ads/library/', | |
| params, | |
| keepRawResponse: true, | |
| }); | |
| const cookies = response?.headers?.['set-cookie']; | |
| this.updateCookies(cookies); | |
| this.updateMapQueryParams(response?.data); | |
| this.updateSessionId(response?.data); | |
| } | |
| public async refreshSession() { | |
| Logger.info(`[${this.context}] - refresh session, current session id: ${this.sessionId}`); | |
| await this.createSession(); | |
| Logger.success(`[${this.context}] - refresh session done, new session id: ${this.sessionId}`); | |
| } | |
| private buildQueryParamVariables(req: GetAdsRequest): string { | |
| const variables = { | |
| activeStatus: req.activeStatus || 'ALL', | |
| adType: req.adType || 'ALL', | |
| audienceTimeframe: 'LAST_7_DAYS', | |
| bylines: [], | |
| contentLanguages: req.contentLanguages || [], | |
| countries: ['ALL'], | |
| country: req.country || 'ALL', | |
| excludedIDs: [], | |
| fetchPageInfo: false, | |
| fetchSharedDisclaimers: false, | |
| isTargetedCountry: false, | |
| location: null, | |
| mediaType: req.mediaType || 'ALL', | |
| multiCountryFilterMode: null, | |
| pageIDs: [], | |
| first: 30, | |
| potentialReachInput: [], | |
| publisherPlatforms: req.publisherPlatforms || [], | |
| queryString: req.query, | |
| ...(req.cursor && { cursor: req.cursor }), | |
| regions: [], | |
| searchType: req.searchType || 'KEYWORD_UNORDERED', | |
| sessionID: this.sessionId || '', | |
| sortData: null, | |
| ...(req.startDate && { | |
| startDate: { | |
| min: req.startDate?.min || null, | |
| max: req.startDate?.max || null, | |
| }, | |
| }), | |
| viewAllPageID: '0', | |
| }; | |
| return JSON.stringify(variables); | |
| } | |
| private buildQueryParams(req: GetAdsRequest): string { | |
| const isFirstPage = req.page === 1; | |
| const params = { | |
| doc_id: isFirstPage ? '24456302960624351' : '24394279933540792', | |
| server_timestamps: 'true', | |
| fb_api_caller_class: 'RelayModern', | |
| fb_api_req_friendly_name: isFirstPage ? 'AdLibraryMobileFocusedStateProviderRefetchQuery' : 'AdLibrarySearchPaginationQuery', | |
| variables: this.buildQueryParamVariables(req), | |
| __jssesw: '1', | |
| __spin_t: this.mapQueryParams['__spin_t'], | |
| __spin_b: this.mapQueryParams['__spin_b'], | |
| __spin_r: this.mapQueryParams['__spin_r'], | |
| lsd: this.mapQueryParams['lsd'], | |
| __comet_req: '1', | |
| __hsi: this.mapQueryParams['__hsi'], | |
| __rev: this.mapQueryParams['__rev'], | |
| __ccg: this.mapQueryParams['__ccg'], | |
| dpr: '1', | |
| __hs: this.mapQueryParams['__hs'], | |
| __req: '17', | |
| __a: '1', | |
| __user: '0', | |
| __aaid: '0', | |
| av: '0', | |
| }; | |
| return qs.stringify(params); | |
| } | |
| private buildQueryHeaders() { | |
| const headers: AxiosRequestConfig['headers'] = { | |
| 'content-type': 'application/x-www-form-urlencoded', | |
| refer: 'https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&is_targeted_country=false&media_type=all', | |
| accept: '*/*', | |
| 'x-asbd-id': '359341', | |
| 'x-fb-friendly-name': 'AdLibraryMobileFocusedStateProviderRefetchQuery', | |
| 'sec-fetch-site': 'same-origin', | |
| 'sec-fetch-mode': 'cors', | |
| 'sec-fetch-dest': 'empty', | |
| 'sec-gpc': '1', | |
| ...(!!this.mapQueryParams?.lsd && { 'x-fb-lsd': this.mapQueryParams.lsd as string }), | |
| }; | |
| return headers; | |
| } | |
| private async queryAds(req: GetAdsRequest, maxRetries = 3): Promise<SearchResults | undefined> { | |
| try { | |
| const payload = { | |
| url: `/api/graphql/`, | |
| method: 'post', | |
| headers: this.buildQueryHeaders(), | |
| data: this.buildQueryParams(req), | |
| }; | |
| Logger.info(`[${this.context}] - query cursor: ${Utils.truncateLongText(req.cursor ?? '--')}`); | |
| const response = await this.request.send<string | any>(payload); | |
| if (!response) throw new Error('Empty response'); | |
| let searchResults: SearchResults | undefined; | |
| if (typeof response === 'string') { | |
| const responseJsonString = `{"label": "AdLibraryMobileFocusedStateProvider${response.split('AdLibraryMobileFocusedStateProvider')?.[1]}`; | |
| const responseJson = JSON.parse(responseJsonString); | |
| searchResults = responseJson?.data?.ad_library_main?.search_results_connection; | |
| } else if (typeof response === 'object' && response?.data?.ad_library_main?.search_results_connection) { | |
| searchResults = response?.data?.ad_library_main?.search_results_connection; | |
| } | |
| if (!searchResults || searchResults.count === 0 || (searchResults.edges && searchResults.edges.length === 0)) { | |
| Logger.info(`[${this.context}] - ${JSON.stringify(searchResults)}`); | |
| throw new Error('Empty response'); | |
| } | |
| return searchResults; | |
| } catch (err: any) { | |
| const retry = maxRetries - 1; | |
| if (retry > 0) { | |
| Logger.error(`[${this.context}] - query ads failed, error: ${err.message}, retry ${retry} -> retry`); | |
| await this.refreshSession(); | |
| return await this.queryAds(req, retry); | |
| } else { | |
| Logger.error(`[${this.context}] - query ads failed, error: ${err.message} -> stop`); | |
| return; | |
| } | |
| } | |
| } | |
| private parseAndSaveAds({ edges }: SearchResults, saver: FileSaver, page: number) { | |
| if (!edges) { | |
| Logger.info(`[${this.context}] - Not found any ads`); | |
| return; | |
| } | |
| let addCount = 0; | |
| for (let ads of edges) { | |
| const node = ads.node; | |
| const { page_id, page_name } = node.collated_results[0] || {}; | |
| if (page_id && page_name) { | |
| if (!this.adsData.has(page_id)) { | |
| this.adsData.set(page_id, page_name); | |
| saver.write(`${page_id}\n`); | |
| Logger.success(`[${this.context}] - found ads: page id: ${page_id} - page name: ${page_name}`); | |
| addCount++; | |
| } | |
| } | |
| } | |
| Logger.info(`[${this.context}] - page: ${page} - add ${addCount} ads, total ads: ${this.adsData.size}`); | |
| } | |
| public async getAds(req: GetAdsRequest) { | |
| try { | |
| const startTime = Date.now(); | |
| Logger.info(`Get ads: ${JSON.stringify(req)}`); | |
| this.clearAdsData(); | |
| this.context = `query: ${req.query}`; | |
| const fileSaver = new FileSaver(`ads-${req.query}`); | |
| if (!this.sessionId) await this.createSession(); | |
| let info: { | |
| cursor?: string; | |
| hasNext: boolean; | |
| count: number; | |
| page: number; | |
| } = { | |
| hasNext: true, | |
| count: 0, | |
| page: 1, | |
| }; | |
| do { | |
| const payload = { ...req, cursor: info.cursor, page: info.page }; | |
| const response = await this.queryAds(payload); | |
| if (!response) { | |
| Logger.info(`[${this.context}] - No results found.`); | |
| break; | |
| } | |
| if (!info.count) { | |
| info.count = response.count || 0; | |
| Logger.info(`[${this.context}] - total found ads: ${info.count}`); | |
| } | |
| this.parseAndSaveAds(response, fileSaver, info.page); | |
| if (req.maxRecords && this.adsData.size >= req.maxRecords) { | |
| Logger.info(`[${this.context}] - max records reached: ${req.maxRecords} -> stop`); | |
| break; | |
| } | |
| info.hasNext = response.page_info?.has_next_page && !!response.page_info?.end_cursor; | |
| info.cursor = response.page_info?.end_cursor; | |
| info.page++; | |
| if (!info.hasNext) { | |
| Logger.info(`[${this.context}] - no more ads -> stop, page_info: ${JSON.stringify(response.page_info)}`); | |
| } | |
| } while (info.hasNext); | |
| fileSaver.close(); | |
| Logger.success(`[${this.context}] - get ads done, total ads: ${this.adsData.size}, took: ${Date.now() - startTime}ms`); | |
| } catch (err: any) { | |
| Logger.error(`Error when get ads of query: ${req.query}, error: ${err.message}`); | |
| } | |
| } | |
| } | |
| export default MetaAds; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment