Skip to content

Instantly share code, notes, and snippets.

View vovw's full-sized avatar
:shipit:
experimenting

atharva vovw

:shipit:
experimenting
View GitHub Profile
#!/usr/bin/env python3
import argparse
import os
import glob
import numpy as np
import torch
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.decomposition import PCA
README.md: 100%
 7.94k/7.94k [00:00<00:00, 452kB/s]
train-00000-of-00001.parquet: 100%
 2.31M/2.31M [00:00<00:00, 35.3MB/s]
test-00000-of-00001.parquet: 100%
 419k/419k [00:00<00:00, 29.5MB/s]
Generating train split: 100%
 7473/7473 [00:00<00:00, 63339.07 examples/s]
Generating test split: 100%
import ijson
import json
original_dataset_path_1 = "llava_hindi.json"
original_dataset_path_2 = "aadarsh.json"
new_dataset_path = './new.json'
def process_json_file(filename):
with open(filename, 'rb') as file: