Skip to content

Instantly share code, notes, and snippets.

@hashcott
Created June 30, 2025 10:12
Show Gist options
  • Select an option

  • Save hashcott/c0d1d983ba2f173a7899b51073c794c6 to your computer and use it in GitHub Desktop.

Select an option

Save hashcott/c0d1d983ba2f173a7899b51073c794c6 to your computer and use it in GitHub Desktop.
Crawl meta ad library
const config = {
urls: {
base: 'https://www.facebook.com',
},
defaultRequestHeaders: {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'accept-language': 'en-US,en;q=0.9',
'content-type': 'text/html; charset="utf-8"',
'cache-control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
},
defaultRequestParams: {
__user: '0',
__a: '1',
fb_api_caller_class: 'RelayModern',
fb_api_req_friendly_name: 'AdLibraryMobileFocusedStateProviderRefetchQuery',
server_timestamps: 'true',
__jssesw: '1',
},
};
export default config;
import qs from 'qs';
import { type AxiosResponse, type AxiosRequestConfig } from 'axios';
import { Proxy922s5, ProxyPlainText } from './proxy';
import config from '../config';
import { Logger, Request, Utils } from '../utils';
import { FileSaver } from './saver';
type Ads = {
page_id: number;
page_name: string;
};
type SearchResults = {
count?: number;
edges?: {
node: {
collated_results: Ads[];
};
}[];
page_info: {
has_next_page: boolean;
end_cursor: string;
};
};
type Platform = 'FACEBOOK' | 'INSTAGRAM' | 'AUDIENCE_NETWORK' | 'MESSENGER' | 'WHATSAPP' | 'OCULUS' | 'THREADS';
// https://www.facebook.com/ads/library/api/?source=nav-header
export type GetAdsRequest = {
adType?: 'ALL' | 'EMPLOYMENT_ADS' | 'FINANCIAL_PRODUCTS_AND_SERVICES_ADS' | 'HOUSING_ADS' | 'POLITICAL_AND_ISSUE_ADS';
page?: number;
query: string;
searchType?: 'KEYWORD_UNORDERED' | 'KEYWORD_EXACT_PHRASE';
cursor?: string;
contentLanguages?: string[];
activeStatus?: 'ACTIVE' | 'INACTIVE' | 'ALL';
mediaType?: 'ALL' | 'IMAGE' | 'MEME' | 'VIDEO' | 'NONE';
publisherPlatforms?: Platform[];
country?: string;
startDate?: {
min?: string; // 2018-06-15
max?: string; // 2025-06-07
};
maxRecords?: number;
};
class MetaAds {
private request: Request;
private sessionId?: string;
private mapQueryParams: Record<string, unknown> = {};
private adsData: Map<number, string> = new Map();
private context: string = '';
constructor(private debug = false) {
this.request = new Request(
{
baseURL: config.urls.base,
headers: config.defaultRequestHeaders,
},
this.debug
);
}
clearAdsData(): void {
this.adsData.clear();
}
private updateCookies(cookies?: string[]): void {
if (!cookies) return;
for (let cookie of cookies) {
if (typeof cookie !== 'string') continue;
const cookiePair = cookie.split(';')[0];
if (cookiePair?.startsWith('datr=')) {
const cookieValue = `${cookiePair}; dpr=1.53; wd=1920x1080`;
Logger.info(`[${this.context}] - set cookies: ${cookieValue}`);
return this.request.setHeaders({ Cookies: cookieValue });
}
}
}
private updateMapQueryParams(data?: string): void {
if (!data) return;
const scriptText = data.match(
/<script type="application\/json"(?:\s+data-content-len="\d+")?\s+data-sjs>\{"require":\[\["ScheduledServerJS","handle"[^<]*(?:<\/script>|$)/
)?.[0];
if (scriptText) {
const jsonString = scriptText.replace(/<script type="application\/json"(?:\s+data-content-len="\d+")?\s+data-sjs>/, '').replace(/<\/script>|$/, '');
try {
const jsonData = JSON.parse(jsonString);
const defineData =
Utils.getLastItemFromArray<Array<{ __bbox: { define?: [string, unknown[], Record<string, unknown>, unknown][] } }>>(jsonData.require?.[0]) || [];
for (let define of defineData) {
const box = define?.__bbox?.define || [];
for (let item of box) {
if (item[0] === 'LSD') {
this.mapQueryParams['lsd'] = item[2]['token'] as string;
} else if (item[0] === 'WebConnectionClassServerGuess') {
this.mapQueryParams['__ccg'] = item[2]['connectionClass'] as string;
} else if (item[0] === 'GetAsyncParamsExtraData') {
this.mapQueryParams['__aaid'] = (item[2]['extra_data'] as Record<string, string>)['__aaid'];
} else if (item[0] === 'SiteData') {
const dict = item[2] as Record<string, string>;
this.mapQueryParams['__hs'] = dict['haste_session'];
this.mapQueryParams['__rev'] = dict['__spin_r'];
this.mapQueryParams['__hsi'] = dict['hsi'];
this.mapQueryParams['__spin_r'] = dict['__spin_r'];
this.mapQueryParams['__spin_b'] = dict['__spin_b'];
this.mapQueryParams['__spin_t'] = dict['__spin_t'];
}
}
}
} catch (err: any) {
Logger.error(`[${this.context}] - Failed to parse script data: ${err.message}`);
}
}
}
private updateSessionId(data?: string): void {
this.sessionId = data?.split('sessionId')?.[1]?.split(':')?.[1]?.split(',')?.[0]?.replaceAll('"', '');
}
private async createSession(): Promise<void> {
const params = {
active_status: 'all',
ad_type: 'all',
country: 'ALL',
is_targeted_country: 'false',
media_type: 'all',
};
const response = await this.request.send<AxiosResponse>({
url: '/ads/library/',
params,
keepRawResponse: true,
});
const cookies = response?.headers?.['set-cookie'];
this.updateCookies(cookies);
this.updateMapQueryParams(response?.data);
this.updateSessionId(response?.data);
}
public async refreshSession() {
Logger.info(`[${this.context}] - refresh session, current session id: ${this.sessionId}`);
await this.createSession();
Logger.success(`[${this.context}] - refresh session done, new session id: ${this.sessionId}`);
}
private buildQueryParamVariables(req: GetAdsRequest): string {
const variables = {
activeStatus: req.activeStatus || 'ALL',
adType: req.adType || 'ALL',
audienceTimeframe: 'LAST_7_DAYS',
bylines: [],
contentLanguages: req.contentLanguages || [],
countries: ['ALL'],
country: req.country || 'ALL',
excludedIDs: [],
fetchPageInfo: false,
fetchSharedDisclaimers: false,
isTargetedCountry: false,
location: null,
mediaType: req.mediaType || 'ALL',
multiCountryFilterMode: null,
pageIDs: [],
first: 30,
potentialReachInput: [],
publisherPlatforms: req.publisherPlatforms || [],
queryString: req.query,
...(req.cursor && { cursor: req.cursor }),
regions: [],
searchType: req.searchType || 'KEYWORD_UNORDERED',
sessionID: this.sessionId || '',
sortData: null,
...(req.startDate && {
startDate: {
min: req.startDate?.min || null,
max: req.startDate?.max || null,
},
}),
viewAllPageID: '0',
};
return JSON.stringify(variables);
}
private buildQueryParams(req: GetAdsRequest): string {
const isFirstPage = req.page === 1;
const params = {
doc_id: isFirstPage ? '24456302960624351' : '24394279933540792',
server_timestamps: 'true',
fb_api_caller_class: 'RelayModern',
fb_api_req_friendly_name: isFirstPage ? 'AdLibraryMobileFocusedStateProviderRefetchQuery' : 'AdLibrarySearchPaginationQuery',
variables: this.buildQueryParamVariables(req),
__jssesw: '1',
__spin_t: this.mapQueryParams['__spin_t'],
__spin_b: this.mapQueryParams['__spin_b'],
__spin_r: this.mapQueryParams['__spin_r'],
lsd: this.mapQueryParams['lsd'],
__comet_req: '1',
__hsi: this.mapQueryParams['__hsi'],
__rev: this.mapQueryParams['__rev'],
__ccg: this.mapQueryParams['__ccg'],
dpr: '1',
__hs: this.mapQueryParams['__hs'],
__req: '17',
__a: '1',
__user: '0',
__aaid: '0',
av: '0',
};
return qs.stringify(params);
}
private buildQueryHeaders() {
const headers: AxiosRequestConfig['headers'] = {
'content-type': 'application/x-www-form-urlencoded',
refer: 'https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&is_targeted_country=false&media_type=all',
accept: '*/*',
'x-asbd-id': '359341',
'x-fb-friendly-name': 'AdLibraryMobileFocusedStateProviderRefetchQuery',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'sec-gpc': '1',
...(!!this.mapQueryParams?.lsd && { 'x-fb-lsd': this.mapQueryParams.lsd as string }),
};
return headers;
}
private async queryAds(req: GetAdsRequest, maxRetries = 3): Promise<SearchResults | undefined> {
try {
const payload = {
url: `/api/graphql/`,
method: 'post',
headers: this.buildQueryHeaders(),
data: this.buildQueryParams(req),
};
Logger.info(`[${this.context}] - query cursor: ${Utils.truncateLongText(req.cursor ?? '--')}`);
const response = await this.request.send<string | any>(payload);
if (!response) throw new Error('Empty response');
let searchResults: SearchResults | undefined;
if (typeof response === 'string') {
const responseJsonString = `{"label": "AdLibraryMobileFocusedStateProvider${response.split('AdLibraryMobileFocusedStateProvider')?.[1]}`;
const responseJson = JSON.parse(responseJsonString);
searchResults = responseJson?.data?.ad_library_main?.search_results_connection;
} else if (typeof response === 'object' && response?.data?.ad_library_main?.search_results_connection) {
searchResults = response?.data?.ad_library_main?.search_results_connection;
}
if (!searchResults || searchResults.count === 0 || (searchResults.edges && searchResults.edges.length === 0)) {
Logger.info(`[${this.context}] - ${JSON.stringify(searchResults)}`);
throw new Error('Empty response');
}
return searchResults;
} catch (err: any) {
const retry = maxRetries - 1;
if (retry > 0) {
Logger.error(`[${this.context}] - query ads failed, error: ${err.message}, retry ${retry} -> retry`);
await this.refreshSession();
return await this.queryAds(req, retry);
} else {
Logger.error(`[${this.context}] - query ads failed, error: ${err.message} -> stop`);
return;
}
}
}
private parseAndSaveAds({ edges }: SearchResults, saver: FileSaver, page: number) {
if (!edges) {
Logger.info(`[${this.context}] - Not found any ads`);
return;
}
let addCount = 0;
for (let ads of edges) {
const node = ads.node;
const { page_id, page_name } = node.collated_results[0] || {};
if (page_id && page_name) {
if (!this.adsData.has(page_id)) {
this.adsData.set(page_id, page_name);
saver.write(`${page_id}\n`);
Logger.success(`[${this.context}] - found ads: page id: ${page_id} - page name: ${page_name}`);
addCount++;
}
}
}
Logger.info(`[${this.context}] - page: ${page} - add ${addCount} ads, total ads: ${this.adsData.size}`);
}
public async getAds(req: GetAdsRequest) {
try {
const startTime = Date.now();
Logger.info(`Get ads: ${JSON.stringify(req)}`);
this.clearAdsData();
this.context = `query: ${req.query}`;
const fileSaver = new FileSaver(`ads-${req.query}`);
if (!this.sessionId) await this.createSession();
let info: {
cursor?: string;
hasNext: boolean;
count: number;
page: number;
} = {
hasNext: true,
count: 0,
page: 1,
};
do {
const payload = { ...req, cursor: info.cursor, page: info.page };
const response = await this.queryAds(payload);
if (!response) {
Logger.info(`[${this.context}] - No results found.`);
break;
}
if (!info.count) {
info.count = response.count || 0;
Logger.info(`[${this.context}] - total found ads: ${info.count}`);
}
this.parseAndSaveAds(response, fileSaver, info.page);
if (req.maxRecords && this.adsData.size >= req.maxRecords) {
Logger.info(`[${this.context}] - max records reached: ${req.maxRecords} -> stop`);
break;
}
info.hasNext = response.page_info?.has_next_page && !!response.page_info?.end_cursor;
info.cursor = response.page_info?.end_cursor;
info.page++;
if (!info.hasNext) {
Logger.info(`[${this.context}] - no more ads -> stop, page_info: ${JSON.stringify(response.page_info)}`);
}
} while (info.hasNext);
fileSaver.close();
Logger.success(`[${this.context}] - get ads done, total ads: ${this.adsData.size}, took: ${Date.now() - startTime}ms`);
} catch (err: any) {
Logger.error(`Error when get ads of query: ${req.query}, error: ${err.message}`);
}
}
}
export default MetaAds;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment