- запрос на
https://api.ozon.ru/composer-api.bx/page/json/v1?url=/products/[тут ID продукта]/?layout_container=appPdpPage3&layout_page_index=3 - в узле "layout" найти узел с параметром component=seller
- из найденного узла взять stateId (например seller-814807-appPdpPage3-3)
- в узле pdp найти узел с следующим полным xpath /pdb/seller/seller-814807-appPdpPage3-3 последняя ветка это параметр из предыдущего пункта
- найденный узел и будет информацией о продавце
| <!DOCTYPE html><html itemscope="" itemtype="http://schema.org/Place" lang="en-RU"> <head> <link href="/maps/_/js/k=maps.m.en.GBzd5ZChrUs.es5.O/m=sc2,per,mo,lp,ep,ti,ds,stx,dwi,enr,pwd,dw,plm,log,b/am=BgBGAxI/rt=j/d=1/rs=ACT90oEgzA4NjNg7VA4qKaAf-t7UWDH1Ng?wli=m.vOb3cCZHKaI.loadSv.O%3A%3B" as="script" rel="preload" type="application/javascript" nonce="U_IB5RFP8128Yb-UrmQ6Dg"> <link href="/maps/preview/opensearch.xml?hl=en" title="Google Maps" rel="search" type="application/opensearchdescription+xml"> <title> Google Maps </title> <meta content="Find local businesses, view maps and get driving directions in Google Maps." name="Description"> <meta content="Anm+hhtuh7NJguqSnXHEAIqqMaV+GXCks8WYXHJKF7l6AeYMj+wO+fi9OdDqFnJTg9t0492DykVxx4jpvFbxnA8AAABseyJvcmlnaW4iOiJodHRwczovL2dvb2dsZS5jb206NDQzIiwiZmVhdHVyZSI6IlByaXZhY3lTYW5kYm94QWRzQVBJcyIsImV4cGlyeSI6MTY5NTE2Nzk5OSwiaXNTdWJkb21haW4iOnRydWV9" http-equiv="origin-trial"> <meta content="initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0, user-scalable=no" name= |
curl --location --request POST 'https://kiev.prom.ua/graphql'
--header 'Content-Type: application/json'
--data-raw '[{"variables":{"categoryId":0,"target":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"PromoPanelQuery","query":"query PromoPanelQuery($categoryId: Intu0021, $target: Stringu0021, $path: Stringu0021) {\n promoPanel(category_id: $categoryId, target: $target, path: $path) {\n isAvailable\n url\n data {\n id\n image\n text\n textColor\n gradientStart\n gradientStop\n url\n urlTarget\n __typename\n }\n __typename\n }\n}\n"},{"variables":{"pageName":"company-listing-page","path":"/c197461-internet-magazin-grand.html"},"extensions":{},"operationName":"ListingBannerQuery","query":"query ListingBannerQuery($currentCategoryId: Int, $pageName: Stringu0021, $path: Stringu0021) {\n listingBanner(category_id: $currentCategoryId, target: $pageName, path: $path) {\n isAvailable\n url\
| (scrapy) developer@ip-172-31-36-21:~/cosmetics_catalogs$ python test.py | |
| Traceback (most recent call last): | |
| File "test.py", line 13, in <module> | |
| asyncio.run(main()) | |
| File "/usr/lib/python3.8/asyncio/runners.py", line 44, in run | |
| return loop.run_until_complete(main) | |
| File "/usr/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete | |
| return future.result() | |
| File "test.py", line 7, in main | |
| browser = await browser_type.launch() |
| def process_request(self, request, spider): | |
| cookiejarkey = request.meta.get('proxy') or request.meta.get("cookiejar") | |
| proxy = request.meta.get('proxy') | |
| token = self.tokens.get(cookiejarkey) | |
| # req = request.copy() | |
| if token: | |
| s = '' | |
| session = requests.Session() | |
| headers = self.headers | |
| headers['User-Agent'] = self.ua |
| import os | |
| import logging | |
| from scrapy.utils.project import get_project_settings | |
| from scrapy.utils.log import configure_logging | |
| from notifiers.logging import NotificationHandler | |
| from os import getenv | |
| from dotenv import load_dotenv | |
| from datetime import datetime | |
| load_dotenv() |
| from itemadapter import ItemAdapter | |
| from shutterstock.db_utils import db_handle, CatalogModel | |
| from scrapy.pipelines.images import ImagesPipeline | |
| from scrapy import Request | |
| import json | |
| class ShutterstockImagePipeline(ImagesPipeline): | |
| def get_media_requests(self, item, info): | |
| url = ItemAdapter(item).get('thumb_url') |
| # -*- coding: utf-8 -*- | |
| # Define your item pipelines here | |
| # | |
| # Don't forget to add your pipeline to the ITEM_PIPELINES setting | |
| # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html | |
| from rssparser.db_utils import * | |
| from rssparser.items import * | |
| import logging |