Created
July 14, 2024 16:01
-
-
Save keithmcnulty/21e90cceae45717a3e361032a3705630 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import os | |
| import glob | |
| import opendatasets as od | |
| # dataset URL | |
| dataset = 'https://www.kaggle.com/datasets/aashita/nyt-comments/' | |
| # Using opendatasets let's download the data sets (480 MB) | |
| od.download(dataset) | |
| # downloaded folder contains many article csv files - we are not interested in them | |
| # remove article csvs to leave just comments csvs | |
| for f in glob.glob("nyt-comments/Article*"): | |
| os.remove(f) | |
| # load all 2017 comment csv files into one single dataframe | |
| # Get a list of all CSV files in a directory | |
| csv_files_2017 = glob.glob('nyt-comments/*2017.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment