import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from mlxtend.frequent_patterns import apriori from mlxtend.frequent_patterns import association_rules df=pd.read_excel("C:/Users/mades/Downloads/Assignment-1_Data.xlsx") df.head()
print("Number of rows and columns:", df.shape) print("\nData Types and Missing Values:") print(df.info()) #Check Missing Values print("Missing Values:") print(df.isnull().sum())
#Drop Rows with Missing Values df.dropna(inplace=True)
transaction_data = df.groupby(['BillNo', 'Date'])['Itemname'].apply(lambda x: ', '.join(x)).reset_index()
#Drop Unnecessary Columns columns_to_drop = ['BillNo', 'Date'] transaction_data.drop(columns=columns_to_drop, inplace=True)
transaction_data_path = "C:/Users/mades/Downloads/Assignment-1_Data.csv" transaction_data.to_csv(transaction_data_path, index=False)
print("\nTransaction Data for Association Rule Mining:") print(transaction_data.head()) transaction_data.shape
items_df = transaction_data['Itemname'].str.split(', ', expand=True)
transaction_data = pd.concat([transaction_data, items_df], axis=1)
transaction_data = transaction_data.drop('Itemname', axis=1)
print(transaction_data.head())# Convert items to boolean columns df_encoded = pd.get_dummies(transaction_data, prefix='', prefix_sep='').groupby(level=0, axis=1).max()
df_encoded.to_csv('transaction_data_encoded.csv', index=False)
df_encoded = pd.read_csv('transaction_data_encoded.csv')
from mlxtend.frequent_patterns import apriori, association_rules
frequent_itemsets = apriori(df_encoded, min_support=0.007, use_colnames=True) rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
print("Association Rules:") print(rules.head()) import matplotlib.pyplot as plt import seaborn as sns
plt.figure(figsize=(12, 8)) sns.scatterplot(x="support", y="confidence", size="lift", data=rules, hue="lift", palette="viridis", sizes=(20, 200)) plt.title('Market Basket Analysis - Support vs. Confidence (Size = Lift)') plt.xlabel('Support') plt.ylabel('Confidence') plt.legend(title='Lift', loc='upper right', bbox_to_anchor=(1.2, 1)) plt.show() import plotly.express as px
rules['antecedents'] = rules['antecedents'].apply(list) rules['consequents'] = rules['consequents'].apply(list)
fig = px.scatter(rules, x="support", y="confidence", size="lift", color="lift", hover_name="consequents", title='Market Basket Analysis - Support vs. Confidence', labels={'support': 'Support', 'confidence': 'Confidence'})
fig.update_layout( xaxis_title='Support', yaxis_title='Confidence', coloraxis_colorbar_title='Liftimport networkx as nx import matplotlib.pyplot as plt import plotly.graph_objects as go
G = nx.DiGraph()
for idx, row in rules.iterrows(): G.add_node(tuple(row['antecedents']), color='skyblue') G.add_node(tuple(row['consequents']), color='orange') G.add_edge(tuple(row['antecedents']), tuple(row['consequents']), weight=row['support'])
pos = nx.spring_layout(G)
edge_x = [] edge_y = [] for edge in G.edges(data=True): x0, y0 = pos[edge[0]] x1, y1 = pos[edge[1]] edge_x.append(x0) edge_x.append(x1) edge_x.append(None) edge_y.append(y0) edge_y.append(y1) edge_y.append(None)
edge_trace = go.Scatter( x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')
node_x = [] node_y = [] for node in G.nodes(): x, y = pos[node] node_x.append(x) node_y.append(y)
node_trace = go.Scatter( x=node_x, y=node_y, mode='markers', hoverinfo='text', marker=dict( showscale=True, colorscale='YlGnBu', size=10, colorbar=dict( thickness=15, title='Node Connections', xanchor='left', titleside='right' ) ) )
layout = go.Layout( showlegend=False, hovermode='closest', margin=dict(b=0, l=0, r=0, t=0), )
fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
fig.show() import plotly.express as px
rules['rule'] = rules['antecedents'].astype(str) + ' -> ' + rules['consequents'].astype(str)
fig = px.sunburst(rules, path=['rule'], values='lift', title='Market Basket Analysis - Sunburst Chart', color='support', color_continuous_scale='YlGnBu')
fig.update_layout( margin=dict(l=0, r=0, b=0, t=40), )
fig.show()
showlegend=True
)
fig.show()