apriori-algorithm-python associationrules-algorithms datamining-algorithms pandas

Market_basket_insights

import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from mlxtend.frequent_patterns import apriori from mlxtend.frequent_patterns import association_rules df=pd.read_excel("C:/Users/mades/Downloads/Assignment-1_Data.xlsx") df.head()

Display basic information about the dataset

print("Number of rows and columns:", df.shape) print("\nData Types and Missing Values:") print(df.info()) #Check Missing Values print("Missing Values:") print(df.isnull().sum())

#Drop Rows with Missing Values df.dropna(inplace=True)

Convert dataframe into transaction data

transaction_data = df.groupby(['BillNo', 'Date'])['Itemname'].apply(lambda x: ', '.join(x)).reset_index()

#Drop Unnecessary Columns columns_to_drop = ['BillNo', 'Date'] transaction_data.drop(columns=columns_to_drop, inplace=True)

Save the transaction data to a CSV file

transaction_data_path = "C:/Users/mades/Downloads/Assignment-1_Data.csv" transaction_data.to_csv(transaction_data_path, index=False)

Display the first few rows of the transaction data

print("\nTransaction Data for Association Rule Mining:") print(transaction_data.head()) transaction_data.shape

Split the 'Itemname' column into individual items

items_df = transaction_data['Itemname'].str.split(', ', expand=True)

Concatenate the original DataFrame with the new items DataFrame

transaction_data = pd.concat([transaction_data, items_df], axis=1)

Drop the original 'Itemname' column

transaction_data = transaction_data.drop('Itemname', axis=1)

Display the resulting DataFrame

print(transaction_data.head())# Convert items to boolean columns df_encoded = pd.get_dummies(transaction_data, prefix='', prefix_sep='').groupby(level=0, axis=1).max()

Save the transaction data to a CSV file

df_encoded.to_csv('transaction_data_encoded.csv', index=False)

Load transaction data into a DataFrame

df_encoded = pd.read_csv('transaction_data_encoded.csv')

from mlxtend.frequent_patterns import apriori, association_rules

Association Rule Mining

frequent_itemsets = apriori(df_encoded, min_support=0.007, use_colnames=True) rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

Display information of the rules

print("Association Rules:") print(rules.head()) import matplotlib.pyplot as plt import seaborn as sns

Plot scatterplot for Support vs. Confidence

plt.figure(figsize=(12, 8)) sns.scatterplot(x="support", y="confidence", size="lift", data=rules, hue="lift", palette="viridis", sizes=(20, 200)) plt.title('Market Basket Analysis - Support vs. Confidence (Size = Lift)') plt.xlabel('Support') plt.ylabel('Confidence') plt.legend(title='Lift', loc='upper right', bbox_to_anchor=(1.2, 1)) plt.show() import plotly.express as px

Convert frozensets to lists for serialization

rules['antecedents'] = rules['antecedents'].apply(list) rules['consequents'] = rules['consequents'].apply(list)

Create an interactive scatter plot using plotly express

fig = px.scatter(rules, x="support", y="confidence", size="lift", color="lift", hover_name="consequents", title='Market Basket Analysis - Support vs. Confidence', labels={'support': 'Support', 'confidence': 'Confidence'})

Customize the layout

fig.update_layout( xaxis_title='Support', yaxis_title='Confidence', coloraxis_colorbar_title='Liftimport networkx as nx import matplotlib.pyplot as plt import plotly.graph_objects as go

Create a directed graph

G = nx.DiGraph()

Add nodes and edges from association rules

for idx, row in rules.iterrows(): G.add_node(tuple(row['antecedents']), color='skyblue') G.add_node(tuple(row['consequents']), color='orange') G.add_edge(tuple(row['antecedents']), tuple(row['consequents']), weight=row['support'])

Set node positions using a spring layout

pos = nx.spring_layout(G)

Create an interactive plot using plotly

edge_x = [] edge_y = [] for edge in G.edges(data=True): x0, y0 = pos[edge[0]] x1, y1 = pos[edge[1]] edge_x.append(x0) edge_x.append(x1) edge_x.append(None) edge_y.append(y0) edge_y.append(y1) edge_y.append(None)

edge_trace = go.Scatter( x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')

node_x = [] node_y = [] for node in G.nodes(): x, y = pos[node] node_x.append(x) node_y.append(y)

node_trace = go.Scatter( x=node_x, y=node_y, mode='markers', hoverinfo='text', marker=dict( showscale=True, colorscale='YlGnBu', size=10, colorbar=dict( thickness=15, title='Node Connections', xanchor='left', titleside='right' ) ) )

Customize the layout

layout = go.Layout( showlegend=False, hovermode='closest', margin=dict(b=0, l=0, r=0, t=0), )

Create the figure

fig = go.Figure(data=[edge_trace, node_trace], layout=layout)

Show the interactive graph

fig.show() import plotly.express as px

Combine antecedents and consequents into a single column for each rule

rules['rule'] = rules['antecedents'].astype(str) + ' -> ' + rules['consequents'].astype(str)

Create a sunburst chart

fig = px.sunburst(rules, path=['rule'], values='lift', title='Market Basket Analysis - Sunburst Chart', color='support', color_continuous_scale='YlGnBu')

Customize the layout

fig.update_layout( margin=dict(l=0, r=0, b=0, t=40), )

Show the interactive plot

fig.show()

showlegend=True

)

Show the interactive plot

fig.show()

About

Market basket analysis is a technique used mostly by retailers to identify which products clients purchase together most frequently. This involves analyzing point of sale (POS) transaction data to identify the correlations between different items according to their co-occurrence in the data.

apriori-algorithm-python associationrules-algorithms datamining-algorithms pandas

Languages

Language:Jupyter Notebook 100.0%