Skip to content

Instantly share code, notes, and snippets.

@curiousest
Created November 25, 2021 17:12
Show Gist options
  • Select an option

  • Save curiousest/a95ac760db3554f34320c357b02638d0 to your computer and use it in GitHub Desktop.

Select an option

Save curiousest/a95ac760db3554f34320c357b02638d0 to your computer and use it in GitHub Desktop.
Compare pandas dataframes
# Because I couldn't get this to do what I wanted:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.compare.html
import pandas as pd
from typing import Optional, List
def compare_dataframes(df1, df2, drop_indexes=False, ignore_columns: Optional[List]=None):
'''Returns two dataframes: the rows that only show up in each dataframe'''
if drop_indexes:
df1 = df1.reset_index(drop=True)
df2 = df2.reset_index(drop=True)
if ignore_columns:
df1 = df1.drop(columns=ignore_columns, errors="ignore")
df2 = df2.drop(columns=ignore_columns, errors="ignore")
merged = df1.merge(df2, how="outer", indicator=True, suffixes=["_old", "_new"])
df1_only = merged[merged["_merge"] == "left_only"]
df1_only = df1_only.drop(columns=["_merge"])
df2_only = merged[merged["_merge"] == "right_only"]
df2_only = df2_only.drop(columns=["_merge"])
return df1_only, df2_only
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment