This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import requests | |
| import pandas as pd | |
| from airflow.models import DAG | |
| from airflow.operators.python import PythonOperator | |
| from datetime import datetime | |
| default_args = { | |
| 'start_date': datetime(year=2021, month=6, day=20) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def load_data(path: str, ti) -> None: | |
| data = ti.xcom_pull(key='transformed_data', task_ids=['transform_data']) | |
| data_df = pd.DataFrame(data[0]) | |
| data_df.to_csv(path, index=None) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def transform_data(ti) -> None: | |
| data = ti.xcom_pull(key='extracted_data', task_ids=['extract_data'])[0] | |
| transformed_data = [] | |
| for item in data: | |
| transformed_data.append({ | |
| 'sku': item.get("sku", ""), | |
| 'Name': item['name'], | |
| 'Price': item['price'].get("original"), | |
| 'Brand Name': item['brand_name'], | |
| 'Thumbnail': f"https://img01.ztat.net/article/{item['media'][0]['path']}", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def extract_data(url: str, headers: str, ti) -> None: | |
| res = requests.get(url, headers=headers) | |
| json_data = json.loads(res.content)['articles'] | |
| ti.xcom_push(key='extracted_data', value=json_data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # coding=UTF-8 | |
| import pickle | |
| import nltk | |
| from nltk.corpus import brown | |
| #TextBlob FastNPExtractor + ConllExtractor | |
| # Textblob | |
| from textblob import TextBlob | |
| from textblob.np_extractors import FastNPExtractor | |
| from textblob.np_extractors import ConllExtractor |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| try: | |
| from flask import app,Flask | |
| from flask_restful import Resource, Api, reqparse | |
| import elasticsearch | |
| from elasticsearch import Elasticsearch | |
| import datetime | |
| import concurrent.futures | |
| import requests | |
| import json | |
| except Exception as e: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from elasticsearch import helpers | |
| res = helpers.bulk(es,gen(df),request_timeout= 300) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import uuid | |
| def gen(df): | |
| for i in df: | |
| yield{ | |
| "_index" : "my_med", | |
| "_type" : "_doc", | |
| "_id" : uuid.uuid4(), | |
| "_source" : { | |
| #"name": i.get("name"), | |
| "name":i.get("name"), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| setting ={ | |
| "mappings" : { | |
| "properties" : { | |
| "Clean_Uses" : { | |
| "type" : "text", | |
| "fields" : { | |
| "keyword" : { | |
| "type" : "keyword", | |
| "ignore_above" : 256 | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| df = pd.read_csv('clean_data.csv') | |
| df = df.to_dict('records') |
NewerOlder