Created
January 23, 2017 08:24
-
-
Save katmutua/42b85779ea353fe4b23b75f402c9bfb4 to your computer and use it in GitHub Desktop.
Excel Differences
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env | |
| import pandas as pd | |
| import numpy as np | |
| # Define the diff function to show the changes in each field | |
| def report_diff(x): | |
| return x[0] if x[0] == x[1] else '{} ---> {}'.format(*x) | |
| # We want to be able to easily tell which rows have changes | |
| def has_change(row): | |
| if "--->" in row.to_string(): | |
| return "Y" | |
| else: | |
| return "N" | |
| # Read in both excel files | |
| df1 = pd.read_excel('dataset1.xlsx', 'Sheet1', na_values=['NA']) | |
| df2 = pd.read_excel('dataset2.xlsx', 'Sheet1', na_values=['NA']) | |
| # Create a panel of the two dataframes | |
| diff_panel = pd.Panel(dict(df1=df1,df2=df2)) | |
| #Apply the diff function | |
| diff_output = diff_panel.apply(report_diff, axis=0) | |
| # Flag all the changes | |
| diff_output['has_change'] = diff_output.apply(has_change, axis=1) | |
| #Save the changes to excel but only include the columns we care about | |
| diff_output[(diff_output.has_change == 'Y')].to_excel('data-diff.xlsx',index=False,columns=["indicator name","Year1","Year2","Year3","Year4"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment