Data Mining Unit 2 Assignment

BP231021 | SandeeP R
II MCa
ELECTIVE V : DATA MINING TECHNIQUES
UNIT 2 - ASSOCIATION RULES
Problem solving based Association Rule Algorithms

Use any transaction dataset, and apply
i. Frequent set algorithm
ii. Apriori algorithm
iii. Partition Algorithm
iv. Pincer Search
v. Dynamic Itemset Counting
Generate association rules.
Record your inference about one algorithm over the other.
According to you, which algorithm do you consider to be efficient for the dataset you have
chosen.
1. FREQUENT SET ALGORITHM
from itertools import combinations

from collections import defaultdict
def generate_candidates(itemsets, k):
"""Generate candidate itemsets of size k."""
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)
def calculate_support(transactions, candidates):
"""Calculate the support count for candidate itemsets."""
support_count = defaultdict(int)
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] +=
return support_count
def frequent_set_algorithm(transactions, min_support):
"""Frequent Set Algorithm to find all frequent itemsets."""
# Initialize variables
frequent_itemsets = {} # Dictionary to store itemsets with their support
k = 1 # Start with itemsets of size 1
current_itemsets = set(frozenset([item]) for transaction in transactions for item in
transaction)
while current_itemsets:
# Calculate support for the current itemsets
support_count = calculate_support(transactions, current_itemsets)
# Filter itemsets that meet the minimum support
current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
# Add the frequent itemsets to the result
frequent_itemsets.update(current_frequent)
# Generate candidates for the next iteration (itemsets of size k+1)
k += 1
current_itemsets = generate_candidates(set(current_frequent.keys()), k)
return frequent_itemsets
# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]
# Minimum support threshold

min_support = 2
# Run the Frequent Set Algorithm

frequent_itemsets = frequent_set_algorithm(transactions, min_support)
# Print the results

print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")
OUTPUT :
Frequent Itemsets:
{'butter'}: 3
{'milk'}: 3
{'bread'}: 4
{'beer'}: 3
{'butter', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter'}: 2
{'beer', 'bread'}: 3
{'milk', 'beer'}: 2
{'milk', 'butter', 'bread'}: 2
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}: 2
2. APRIORI ALGORITHM

return set(
)

"""Calculate support count for candidate itemsets."""
support_count = {itemset: 0 for itemset in candidates}
support_count[candidate] += 1
def apriori(transactions, min_support):

"""Apriori Algorithm to find frequent itemsets."""
frequent_itemsets = {} # Store frequent itemsets with their support
k = 1 # Start with 1-itemsets
transaction)
# Calculate support for the current itemsets
# Filter itemsets based on the minimum support threshold

itemset: count
}
# Add the frequent itemsets to the result

# Generate candidates for the next iteration (itemsets of size k+1)

k += 1
# Example usage
if __name__ == "__main__":
transactions = [
]

min_support = 2
# Run the Apriori Algorithm

frequent_itemsets = apriori(transactions, min_support)
# Print the results

OUTPUT :
Frequent Itemsets:
{'butter'}: 3
{'beer'}: 3
{'milk'}: 3
{'bread'}: 4
{'milk', 'beer'}: 2
3. PARTITION ALGORITHM

# Helper function to generate candidate itemsets

return set(
)
# Helper function to calculate support for itemsets in a partition

# Partition Algorithm implementation

def partition_algorithm(transactions, n_partitions, min_support):
# Phase I: Divide the transactions into n partitions
partition_size = len(transactions) // n_partitions
partitions = [
transactions[i * partition_size:(i + 1) * partition_size]
for i in range(n_partitions)
]
if len(transactions) % n_partitions != 0:
partitions[-1].extend(transactions[n_partitions * partition_size :]) # Handle leftover
# Phase I: Generate local frequent itemsets for each partition

local_frequent_itemsets = []
for partition in partitions:
k=1
current_itemsets = set(frozenset([item]) for transaction in partition for item in
transaction)
partition_frequent = {}
support_count = calculate_support(partition, current_itemsets)
frequent_itemsets = {
itemset: count
}
if not frequent_itemsets:
break
partition_frequent.update(frequent_itemsets)
k += 1
current_itemsets = generate_candidates(set(frequent_itemsets.keys()), k)
local_frequent_itemsets.append(partition_frequent)
# Merge Phase: Combine local frequent itemsets across partitions

global_candidates = defaultdict(int)
for partition_frequent in local_frequent_itemsets:
for itemset, count in partition_frequent.items():
global_candidates[itemset] += count
# Phase II: Validate global candidates with the entire dataset

final_support_count = calculate_support(transactions, set(global_candidates.keys()))
final_frequent_itemsets = {
itemset: count
for itemset, count in final_support_count.items()
}
return final_frequent_itemsets
# Example usage
if __name__ == "__main__":
transactions = [
]
# Number of partitions
n_partitions = 2

min_support = 2
# Run the Partition Algorithm

frequent_itemsets = partition_algorithm(transactions, n_partitions, min_support)
# Print the results

print("Frequent Itemsets:", frequent_itemsets)
OUTPUT :
Frequent Itemsets: {frozenset({'butter', 'bread'}): 3, frozenset({'butter'}): 3,

frozenset({'milk'}): 3, frozenset({'bread'}): 4, frozenset({'milk', 'bread'}): 3,
frozenset({'beer', 'bread'}): 3, frozenset({'beer'}): 3, frozenset({'milk', 'beer', 'bread'}): 2,
frozenset({'milk', 'beer'}): 2}
4. PINCER SEARCH ALGORITHM


return set(
)

"""Calculate the support of candidates in the transactions."""
def pincer_search(transactions, min_support):

"""Pincer-Search Algorithm implementation."""
infrequent_itemsets = set() # Store infrequent itemsets
global_support = {} # Keep track of support for all itemsets
# Generate initial 1-item candidates

k=1
transaction)
# Calculate support for current candidates
global_support.update(support_count)
# Split into frequent and infrequent itemsets

itemset: count
}
infrequent_itemsets.update(
itemset for itemset, count in support_count.items() if count < min_support
)
# Check for termination: if no frequent itemsets, stop

if not current_frequent:
break
# Generate next candidates using frequent itemsets

k += 1
# Prune candidates containing infrequent subsets

current_itemsets = {
candidate
for candidate in current_itemsets
if not any(subset in infrequent_itemsets for subset in combinations(candidate, k - 1))
}
# Example usage
if __name__ == "__main__":
transactions = [
]

min_support = 2
# Run the Pincer-Search Algorithm

frequent_itemsets = pincer_search(transactions, min_support)
# Print the results

OUTPUT :
Frequent Itemsets:
{'butter'}: 3
{'milk'}: 3
{'bread'}: 4
{'beer'}: 3
{'milk', 'beer'}: 2
{'milk', 'beer', 'bread'}:
5. DYNAMIC ITEMSET COUNTING

"""Calculate the support count for a set of candidate itemsets."""
support_count = {itemset: 0 for itemset in candidates}
def dic_algorithm(transactions, min_support):

"""Dynamic Itemset Counting (DIC) Algorithm."""
k = 1 # Current size of itemsets
active_candidates = set(frozenset([item]) for transaction in transactions for item in
transaction)
inactive_candidates = set() # Itemsets to be activated later
while active_candidates:
# Calculate support for active candidates
support_count = calculate_support(transactions, active_candidates)
# Filter frequent itemsets from active candidates

itemset: count
}
# Add the frequent itemsets to the results
# Move non-frequent active candidates to inactive

inactive_candidates.update(
itemset for itemset, count in support_count.items() if count < min_support
)
# Generate new candidates to activate dynamically

new_candidates = set()
for itemset in current_frequent:
for other in frequent_itemsets.keys():
if len(itemset.union(other)) == k + 1:
new_candidate = itemset.union(other)
if all(frozenset(subset) in frequent_itemsets for subset in
combinations(new_candidate, k)):
new_candidates.add(new_candidate)
# Activate new candidates

active_candidates = new_candidates
k += 1
# Example usage
if __name__ == "__main__":
transactions = [
]
min_support = 2
# Run the DIC Algorithm

frequent_itemsets = dic_algorithm(transactions, min_support)
# Print the results

OUTPUT :
Frequent Itemsets:
{'butter'}: 3
{'beer'}: 3
{'milk'}: 3
{'bread'}: 4
{'milk', 'beer'}: 2

Data Mining Unit 2 Assignment

Uploaded by

Data Mining Unit 2 Assignment

Uploaded by

BP231021 | SandeeP R

Problem solving based Association Rule Algorithms

1. FREQUENT SET ALGORITHM

from itertools import combinations

# Minimum support threshold

# Run the Frequent Set Algorithm

# Print the results

from itertools import combinations

def generate_candidates(itemsets, k):

def calculate_support(transactions, candidates):

def apriori(transactions, min_support):

# Filter itemsets based on the minimum support threshold

# Add the frequent itemsets to the result

# Generate candidates for the next iteration (itemsets of size k+1)

# Minimum support threshold

# Run the Apriori Algorithm

# Print the results

from itertools import combinations

# Helper function to generate candidate itemsets

# Helper function to calculate support for itemsets in a partition

# Partition Algorithm implementation

# Phase I: Generate local frequent itemsets for each partition

# Merge Phase: Combine local frequent itemsets across partitions

# Phase II: Validate global candidates with the entire dataset

# Minimum support threshold

# Run the Partition Algorithm

# Print the results

Frequent Itemsets: {frozenset({'butter', 'bread'}): 3, frozenset({'butter'}): 3,

from itertools import combinations

def generate_candidates(itemsets, k):

def calculate_support(transactions, candidates):

def pincer_search(transactions, min_support):

# Generate initial 1-item candidates

# Split into frequent and infrequent itemsets

# Check for termination: if no frequent itemsets, stop

# Generate next candidates using frequent itemsets

# Prune candidates containing infrequent subsets

# Minimum support threshold

# Run the Pincer-Search Algorithm

# Print the results

from itertools import combinations

def calculate_support(transactions, candidates):

def dic_algorithm(transactions, min_support):

# Filter frequent itemsets from active candidates

# Move non-frequent active candidates to inactive

# Generate new candidates to activate dynamically

# Activate new candidates

# Run the DIC Algorithm

# Print the results

You might also like