Association Rule more using python

3 minute read

기존에 했던, mlstend 에서 추가기능 활용을 하기 위한 포스팅이다.

import pandas as pd
import numpy as np
import itertools ## 조합만들때, 필요하다.

mdf = pd.read_csv('D:/★2020_ML_DL_Project/Alchemy/dataset/marketbasket.csv',encoding='UTF8',header='infer')

print(mdf.shape) ## 트랜잭션 수는 1361 건이다.
print(mdf.columns) ## 품목이 255건이나 된다.
mdf.head()

(315, 7)
Index(['0', '1', '2', '3', '4', '5', '6'], dtype='object')

new_columns = mdf.columns.str.strip().to_list()
mdf.columns = new_columns
mdf.head(2)

	Hair Conditioner	Lemons	Standard coffee	Frozen Chicken Wings	98pct. Fat Free Hamburger	Sugar Cookies	Onions	Deli Ham	Dishwasher Detergent	Beets	...	Lollipops	Plain White Bread	Blueberry Yogurt	Frozen Chicken Thighs	Mixed Vegetables	Souring Pads	Tuna Spread	Toilet Paper	White Wine	Columbian Coffee
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

2 rows × 255 columns

from tqdm import tqdm
tqdm.pandas()

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori,association_rules,fpgrowth

연관성분석 하기 좋은 데이터셋으로, 변환시켜주는 라이브러리

cust_id,[item01,item02,item03 ~ item100] 처럼 만들어준다.

본 데이터셋에서는 이미, 변환이 되어 있기 때문에 필요없다. [2차원 list or array 를 형태를 받아서, 변환시킨다.]

te = TransactionEncoder() 예시
te_rslt = te.fit(mdf_lst).transform(mdf_lst) 예시

mdf.shape

(1361, 255)

item_set = fpgrowth(mdf,min_support=0.05,use_colnames=True)

item_set

def calculate_length(df,idx):
    for i in df.index:
        df.at[i,'length_consequent'] = int(len(list(df.iloc[i,idx])))
    return df

rule_rslt_lift = association_rules(item_set, metric="lift", min_threshold=3.0) ## 0.53 / 5.6

rule_rslt_lift

	antecedents	consequents	antecedent support	consequent support	support	confidence	lift	leverage	conviction
0	( Eggs)	( 2pct. Milk)	0.122704	0.109478	0.052168	0.425150	3.883414	0.038734	1.549137
1	( 2pct. Milk)	( Eggs)	0.109478	0.122704	0.052168	0.476510	3.883414	0.038734	1.675861
2	( 2pct. Milk)	( White Bread)	0.109478	0.119030	0.051433	0.469799	3.946889	0.038402	1.661576
3	( White Bread)	( 2pct. Milk)	0.119030	0.109478	0.051433	0.432099	3.946889	0.038402	1.568093
4	( Eggs)	( White Bread)	0.122704	0.119030	0.055107	0.449102	3.773010	0.040501	1.599152
5	( White Bread)	( Eggs)	0.119030	0.122704	0.055107	0.462963	3.773010	0.040501	1.633586
6	( White Bread)	( Potato Chips)	0.119030	0.097722	0.051433	0.432099	4.421702	0.039801	1.588793
7	( Potato Chips)	( White Bread)	0.097722	0.119030	0.051433	0.526316	4.421702	0.039801	1.859825

rule_rslt_lift.index

RangeIndex(start=0, stop=8, step=1)

rule_rslt_lift01 = calculate_length(rule_rslt_lift,1)

rule_rslt_lift01

	antecedents	consequents	antecedent support	consequent support	support	confidence	lift	leverage	conviction	length_consequent
0	( Eggs)	( 2pct. Milk)	0.122704	0.109478	0.052168	0.425150	3.883414	0.038734	1.549137	1.0
1	( 2pct. Milk)	( Eggs)	0.109478	0.122704	0.052168	0.476510	3.883414	0.038734	1.675861	1.0
2	( 2pct. Milk)	( White Bread)	0.109478	0.119030	0.051433	0.469799	3.946889	0.038402	1.661576	1.0
3	( White Bread)	( 2pct. Milk)	0.119030	0.109478	0.051433	0.432099	3.946889	0.038402	1.568093	1.0
4	( Eggs)	( White Bread)	0.122704	0.119030	0.055107	0.449102	3.773010	0.040501	1.599152	1.0
5	( White Bread)	( Eggs)	0.119030	0.122704	0.055107	0.462963	3.773010	0.040501	1.633586	1.0
6	( White Bread)	( Potato Chips)	0.119030	0.097722	0.051433	0.432099	4.421702	0.039801	1.588793	1.0
7	( Potato Chips)	( White Bread)	0.097722	0.119030	0.051433	0.526316	4.421702	0.039801	1.859825	1.0

System Sample