0% found this document useful (0 votes)
8 views15 pages

Data Mining Unit 2 Assignment

The document outlines various association rule algorithms for data mining, including Frequent Set, Apriori, Partition, Pincer Search, and Dynamic Itemset Counting algorithms. Each algorithm is implemented with Python code using a sample transaction dataset to generate frequent itemsets based on a minimum support threshold. The document also encourages the user to analyze and compare the efficiency of these algorithms on their chosen dataset.
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
Download as pdf or txt
0% found this document useful (0 votes)
8 views15 pages

Data Mining Unit 2 Assignment

The document outlines various association rule algorithms for data mining, including Frequent Set, Apriori, Partition, Pincer Search, and Dynamic Itemset Counting algorithms. Each algorithm is implemented with Python code using a sample transaction dataset to generate frequent itemsets based on a minimum support threshold. The document also encourages the user to analyze and compare the efficiency of these algorithms on their chosen dataset.
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
Download as pdf or txt
Download as pdf or txt
You are on page 1/ 15

BP231021 | SandeeP R

II MCa
ELECTIVE V : DATA MINING TECHNIQUES
UNIT 2 - ASSOCIATION RULES

Problem solving based Association Rule Algorithms


Use any transaction dataset, and apply
i. Frequent set algorithm
ii. Apriori algorithm
iii. Partition Algorithm
iv. Pincer Search
v. Dynamic Itemset Counting
Generate association rules.
Record your inference about one algorithm over the other.
According to you, which algorithm do you consider to be efficient for the dataset you have
chosen.

1. FREQUENT SET ALGORITHM

from itertools import combinations


from collections import defaultdict
def generate_candidates(itemsets, k):
"""Generate candidate itemsets of size k."""
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)
def calculate_support(transactions, candidates):
"""Calculate the support count for candidate itemsets."""
support_count = defaultdict(int)
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] +=
return support_count
def frequent_set_algorithm(transactions, min_support):
"""Frequent Set Algorithm to find all frequent itemsets."""
# Initialize variables
frequent_itemsets = {} # Dictionary to store itemsets with their support
k = 1 # Start with itemsets of size 1
current_itemsets = set(frozenset([item]) for transaction in transactions for item in
transaction)
while current_itemsets:
# Calculate support for the current itemsets
support_count = calculate_support(transactions, current_itemsets)
# Filter itemsets that meet the minimum support
current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
# Add the frequent itemsets to the result
frequent_itemsets.update(current_frequent)
# Generate candidates for the next iteration (itemsets of size k+1)
k += 1
current_itemsets = generate_candidates(set(current_frequent.keys()), k)

return frequent_itemsets

# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]

# Minimum support threshold


min_support = 2

# Run the Frequent Set Algorithm


frequent_itemsets = frequent_set_algorithm(transactions, min_support)

# Print the results


print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")

OUTPUT :

Frequent Itemsets:
{'butter'}: 3
{'milk'}: 3
{'bread'}: 4
{'beer'}: 3
{'butter', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter'}: 2
{'beer', 'bread'}: 3
{'milk', 'beer'}: 2
{'milk', 'butter', 'bread'}: 2
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}: 2
2. APRIORI ALGORITHM

from itertools import combinations

def generate_candidates(itemsets, k):


"""Generate candidate itemsets of size k."""
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)

def calculate_support(transactions, candidates):


"""Calculate support count for candidate itemsets."""
support_count = {itemset: 0 for itemset in candidates}
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

def apriori(transactions, min_support):


"""Apriori Algorithm to find frequent itemsets."""
# Initialize variables
frequent_itemsets = {} # Store frequent itemsets with their support
k = 1 # Start with 1-itemsets
current_itemsets = set(frozenset([item]) for transaction in transactions for item in
transaction)

while current_itemsets:
# Calculate support for the current itemsets
support_count = calculate_support(transactions, current_itemsets)

# Filter itemsets based on the minimum support threshold


current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}

# Add the frequent itemsets to the result


frequent_itemsets.update(current_frequent)

# Generate candidates for the next iteration (itemsets of size k+1)


k += 1
current_itemsets = generate_candidates(set(current_frequent.keys()), k)

return frequent_itemsets

# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]

# Minimum support threshold


min_support = 2

# Run the Apriori Algorithm


frequent_itemsets = apriori(transactions, min_support)

# Print the results


print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")
OUTPUT :

Frequent Itemsets:
{'butter'}: 3
{'beer'}: 3
{'milk'}: 3
{'bread'}: 4
{'beer', 'butter'}: 2
{'butter', 'bread'}: 3
{'milk', 'beer'}: 2
{'beer', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}: 2
{'milk', 'butter', 'bread'}: 2
3. PARTITION ALGORITHM

from itertools import combinations


from collections import defaultdict

# Helper function to generate candidate itemsets


def generate_candidates(itemsets, k):
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)

# Helper function to calculate support for itemsets in a partition


def calculate_support(transactions, candidates):
support_count = defaultdict(int)
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

# Partition Algorithm implementation


def partition_algorithm(transactions, n_partitions, min_support):
# Phase I: Divide the transactions into n partitions
partition_size = len(transactions) // n_partitions
partitions = [
transactions[i * partition_size:(i + 1) * partition_size]
for i in range(n_partitions)
]
if len(transactions) % n_partitions != 0:
partitions[-1].extend(transactions[n_partitions * partition_size :]) # Handle leftover

# Phase I: Generate local frequent itemsets for each partition


local_frequent_itemsets = []
for partition in partitions:
k=1
current_itemsets = set(frozenset([item]) for transaction in partition for item in
transaction)
partition_frequent = {}
while current_itemsets:
support_count = calculate_support(partition, current_itemsets)
frequent_itemsets = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
if not frequent_itemsets:
break
partition_frequent.update(frequent_itemsets)
k += 1
current_itemsets = generate_candidates(set(frequent_itemsets.keys()), k)
local_frequent_itemsets.append(partition_frequent)

# Merge Phase: Combine local frequent itemsets across partitions


global_candidates = defaultdict(int)
for partition_frequent in local_frequent_itemsets:
for itemset, count in partition_frequent.items():
global_candidates[itemset] += count

# Phase II: Validate global candidates with the entire dataset


final_support_count = calculate_support(transactions, set(global_candidates.keys()))
final_frequent_itemsets = {
itemset: count
for itemset, count in final_support_count.items()
if count >= min_support
}

return final_frequent_itemsets
# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]

# Number of partitions
n_partitions = 2

# Minimum support threshold


min_support = 2

# Run the Partition Algorithm


frequent_itemsets = partition_algorithm(transactions, n_partitions, min_support)

# Print the results


print("Frequent Itemsets:", frequent_itemsets)

OUTPUT :

Frequent Itemsets: {frozenset({'butter', 'bread'}): 3, frozenset({'butter'}): 3,


frozenset({'milk'}): 3, frozenset({'bread'}): 4, frozenset({'milk', 'bread'}): 3,
frozenset({'beer', 'bread'}): 3, frozenset({'beer'}): 3, frozenset({'milk', 'beer', 'bread'}): 2,
frozenset({'milk', 'beer'}): 2}
4. PINCER SEARCH ALGORITHM

from itertools import combinations


from collections import defaultdict

def generate_candidates(itemsets, k):


"""Generate candidate itemsets of size k."""
return set(
[frozenset(a.union(b)) for a in itemsets for b in itemsets if len(a.union(b)) == k]
)

def calculate_support(transactions, candidates):


"""Calculate the support of candidates in the transactions."""
support_count = defaultdict(int)
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

def pincer_search(transactions, min_support):


"""Pincer-Search Algorithm implementation."""
# Initialize variables
frequent_itemsets = {} # Store frequent itemsets with their support
infrequent_itemsets = set() # Store infrequent itemsets
global_support = {} # Keep track of support for all itemsets

# Generate initial 1-item candidates


k=1
current_itemsets = set(frozenset([item]) for transaction in transactions for item in
transaction)

while current_itemsets:
# Calculate support for current candidates
support_count = calculate_support(transactions, current_itemsets)
global_support.update(support_count)

# Split into frequent and infrequent itemsets


current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
frequent_itemsets.update(current_frequent)
infrequent_itemsets.update(
itemset for itemset, count in support_count.items() if count < min_support
)

# Check for termination: if no frequent itemsets, stop


if not current_frequent:
break

# Generate next candidates using frequent itemsets


k += 1
current_itemsets = generate_candidates(set(current_frequent.keys()), k)

# Prune candidates containing infrequent subsets


current_itemsets = {
candidate
for candidate in current_itemsets
if not any(subset in infrequent_itemsets for subset in combinations(candidate, k - 1))
}

return frequent_itemsets

# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]

# Minimum support threshold


min_support = 2

# Run the Pincer-Search Algorithm


frequent_itemsets = pincer_search(transactions, min_support)

# Print the results


print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")

OUTPUT :

Frequent Itemsets:
{'butter'}: 3
{'milk'}: 3
{'bread'}: 4
{'beer'}: 3
{'butter', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter'}: 2
{'beer', 'bread'}: 3
{'milk', 'beer'}: 2
{'milk', 'butter', 'bread'}: 2
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}:
5. DYNAMIC ITEMSET COUNTING

from itertools import combinations

def calculate_support(transactions, candidates):


"""Calculate the support count for a set of candidate itemsets."""
support_count = {itemset: 0 for itemset in candidates}
for transaction in transactions:
for candidate in candidates:
if candidate.issubset(transaction):
support_count[candidate] += 1
return support_count

def dic_algorithm(transactions, min_support):


"""Dynamic Itemset Counting (DIC) Algorithm."""
# Initialize variables
frequent_itemsets = {} # Store frequent itemsets with their support
k = 1 # Current size of itemsets
active_candidates = set(frozenset([item]) for transaction in transactions for item in
transaction)
inactive_candidates = set() # Itemsets to be activated later

while active_candidates:
# Calculate support for active candidates
support_count = calculate_support(transactions, active_candidates)

# Filter frequent itemsets from active candidates


current_frequent = {
itemset: count
for itemset, count in support_count.items()
if count >= min_support
}
# Add the frequent itemsets to the results
frequent_itemsets.update(current_frequent)

# Move non-frequent active candidates to inactive


inactive_candidates.update(
itemset for itemset, count in support_count.items() if count < min_support
)

# Generate new candidates to activate dynamically


new_candidates = set()
for itemset in current_frequent:
for other in frequent_itemsets.keys():
if len(itemset.union(other)) == k + 1:
new_candidate = itemset.union(other)
if all(frozenset(subset) in frequent_itemsets for subset in
combinations(new_candidate, k)):
new_candidates.add(new_candidate)

# Activate new candidates


active_candidates = new_candidates
k += 1

return frequent_itemsets

# Example usage
if __name__ == "__main__":
# Example transactions
transactions = [
{"milk", "bread", "butter"},
{"beer", "bread", "butter"},
{"milk", "beer", "bread"},
{"milk", "bread", "butter", "beer"},
]
# Minimum support threshold
min_support = 2

# Run the DIC Algorithm


frequent_itemsets = dic_algorithm(transactions, min_support)

# Print the results


print("Frequent Itemsets:")
for itemset, support in frequent_itemsets.items():
print(f"{set(itemset)}: {support}")

OUTPUT :
Frequent Itemsets:
{'butter'}: 3
{'beer'}: 3
{'milk'}: 3
{'bread'}: 4
{'butter', 'bread'}: 3
{'beer', 'butter'}: 2
{'beer', 'bread'}: 3
{'milk', 'butter'}: 2
{'milk', 'beer'}: 2
{'milk', 'bread'}: 3
{'beer', 'butter', 'bread'}: 2
{'milk', 'beer', 'bread'}: 2
{'milk', 'butter', 'bread'}: 2

You might also like