keithmcnulty · July 14, 2024 16:01
diff --git a/load_data.py b/load_data.py
 import pandas as pd
 import os
 import glob
 import opendatasets as od

 # dataset URL
 dataset = 'https://www.kaggle.com/datasets/aashita/nyt-comments/'

 # Using opendatasets let's download the data sets (480 MB)
 od.download(dataset)

 # downloaded folder contains many article csv files - we are not interested in them
 # remove article csvs to leave just comments csvs
 for f in glob.glob("nyt-comments/Article*"):
    os.remove(f)

 # load all 2017 comment csv files into one single dataframe
 # Get a list of all CSV files in a directory
 csv_files_2017 = glob.glob('nyt-comments/*2017.csv')
	import pandas as pd
	import os
	import glob
	import opendatasets as od

	# dataset URL
	dataset = 'https://www.kaggle.com/datasets/aashita/nyt-comments/'

	# Using opendatasets let's download the data sets (480 MB)
	od.download(dataset)

	# downloaded folder contains many article csv files - we are not interested in them
	# remove article csvs to leave just comments csvs
	for f in glob.glob("nyt-comments/Article*"):
	os.remove(f)

	# load all 2017 comment csv files into one single dataframe
	# Get a list of all CSV files in a directory
	csv_files_2017 = glob.glob('nyt-comments/*2017.csv')
No results found