nourou4them / python-learning

Learn to code in Python

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Indkauin

nourou4them opened this issue · comments

import pandas as pd
import getpass

def compare_excel_files(file1_path, file2_path):
    try:
        # Read Excel files
        df1 = pd.read_excel(file1_path)
        df2 = pd.read_excel(file2_path)

        # Initialize list to store differences
        differences = []

        # Iterate over unique values in column A of file 1
        unique_values_file1 = df1['A'].unique()
        for value in unique_values_file1:
            # Check if value exists in column A of file 2
            if value in df2['A'].values:
                # Get corresponding rows in file 2
                corresponding_rows_file2 = df2[df2['A'] == value]
                # Check each value in column B of file 1 against corresponding values in column B of file 2
                for _, row_file1 in df1[df1['A'] == value].iterrows():
                    for _, row_file2 in corresponding_rows_file2.iterrows():
                        diff_row = {'A': value}
                        # Compare values in other columns
                        for col in df1.columns[1:]:  # Exclude column A
                            value_file1 = row_file1[col]
                            value_file2 = row_file2[col]
                            if value_file1 != value_file2:
                                diff_row[col] = f"{value_file1} (File 1) vs {value_file2} (File 2)"
                                differences.append(diff_row)
                                break  # No need to check further columns for this pair
            else:
                # Add information for value not found in file 2
                diff_row = {'A': value}
                for col in df1.columns[1:]:
                    value_file1 = df1.loc[df1['A'] == value, col].iloc[0]
                    diff_row[col] = f"{value_file1} (File 1) | Not Found (File 2)"
                differences.append(diff_row)

        # Check if any values in column A of file 2 have no matches in column A of file 1
        unique_values_file2 = df2['A'].unique()
        for value in unique_values_file2:
            if value not in df1['A'].values:
                # Add information for value not found in file 1
                diff_row = {'A': value}
                for col in df1.columns[1:]:
                    diff_row[col] = f"Not Found (File 1) | {df2.loc[df2['A'] == value, col].iloc[0]} (File 2)"
                differences.append(diff_row)

        return len(df1), len(df2), pd.DataFrame(differences)
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None, None, None

def write_output_to_txt(output_path, num_lines_file1, num_lines_file2, differences, file1_name, file2_name, username):
    try:
        with open(output_path, 'w') as file:
            file.write(f"Username: {username}\n")
            file.write(f"Input File 1: {file1_name}\n")
            file.write(f"Input File 2: {file2_name}\n")
            file.write(f"Number of lines in File 1: {num_lines_file1}\n")
            file.write(f"Number of lines in File 2: {num_lines_file2}\n\n")

            if differences is not None and not differences.empty:
                file.write("Differences between files:\n")
                df_diff = pd.DataFrame(differences).drop_duplicates()
                file.write(df_diff.to_string(index=False))
            else:
                file.write("No differences found between files.")
        print("Analysis complete. Output saved to", output_path)
    except Exception as e:
        print(f"An error occurred while writing output: {str(e)}")

if __name__ == "__main__":
    try:
        # Input file paths
        file1_path = input("Enter path to the first Excel file: ")
        file2_path = input("Enter path to the second Excel file: ")
        
        file1_name = file1_path.split('/')[-1]  # Extracting file name with extension
        file2_name = file2_path.split('/')[-1]  # Extracting file name with extension

        # Analyze files and get differences
        num_lines_file1, num_lines_file2, differences = compare_excel_files(file1_path, file2_path)

        # Output to txt file
        output_path = input("Enter the path to save the output txt file: ")
        username = getpass.getuser()  # Get username of the user running the program
        write_output_to_txt(output_path, num_lines_file1, num_lines_file2, differences, file1_name, file2_name, username)
    except KeyboardInterrupt:
        print("\nProgram interrupted by user.")
    except Exception as e:
        print(f"An unexpected error occurred: {str(e)}")