Pandas
nourou4them opened this issue · comments
nourou4them commented
`import pandas as pd
def compare_excel_files(file1_path, file2_path):
# Read Excel files
df1 = pd.read_excel(file1_path)
df2 = pd.read_excel(file2_path)
# Merge dataframes on column A
merged_df = pd.merge(df1, df2, on='A', suffixes=('_file1', '_file2'))
# Initialize list to store differences
differences = []
# Iterate over rows
for index, row in merged_df.iterrows():
diff_row = {'A': row['A']}
# Check for differences in other columns
for col in df1.columns:
if col != 'A':
value_file1 = row[col + '_file1']
value_file2 = row[col + '_file2']
if value_file1 == value_file2:
diff_row[col] = f"{value_file1} (OK)"
else:
diff_row[col] = f"{value_file1} (File 1) vs {value_file2} (File 2)"
# Add row to differences list if there are differences
if any('(File' in value for value in diff_row.values()):
differences.append(diff_row)
return len(df1), len(df2), pd.DataFrame(differences)
def write_output_to_txt(output_path, num_lines_file1, num_lines_file2, differences):
with open(output_path, 'w') as file:
file.write(f"Number of lines in File 1: {num_lines_file1}\n")
file.write(f"Number of lines in File 2: {num_lines_file2}\n\n")
if not differences.empty:
file.write("Differences between files:\n")
file.write(differences.to_string(index=False))
else:
file.write("No differences found between files.")
if name == "main":
# Input file paths
file1_path = input("Enter path to the first Excel file: ")
file2_path = input("Enter path to the second Excel file: ")
# Analyze files and get differences
num_lines_file1, num_lines_file2, differences = compare_excel_files(file1_path, file2_path)
# Output to txt file
output_path = input("Enter the path to save the output txt file: ")
write_output_to_txt(output_path, num_lines_file1, num_lines_file2, differences)
print("Analysis complete. Output saved to", output_path)
`