4. Develop a program to load the Iris dataset. Implement the k-Nearest Neighbors (k-NN) algorithm for classifying flowers based on their features. Split the dataset into training and testing sets and evaluate the model using metrics like accuracy and F1-score. Test it for different values of k (e.g., k=1,3,5) and evaluate the accuracy. Extend the k-NN algorithm to assign weights based on the distance of neighbors (e.g., weight=1/d2 ). Compare the performance of weighted k-NN and regular k-NN on a synthetic or real-world dataset.
PROGRAM:
#install required packages
#pip install numpy pandas scikit-learn
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from collections import Counter
# ==============================
# 1. Load Iris Dataset
# ==============================
iris = load_iris()
X = iris.data
y = iris.target
print("Feature Names:")
print(iris.feature_names)
print("\nTarget Names:")
print(iris.target_names)
# ==============================
# 2. Train-Test Split
# ==============================
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.3,
random_state=42,
stratify=y
)
# ==============================
# 3. Distance Function
# ==============================
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
# ==============================
# 4. Regular k-NN
# ==============================
def knn_predict(X_train, y_train, x_test, k):
distances = []
for i in range(len(X_train)):
dist = euclidean_distance(X_train[i], x_test)
distances.append((dist, y_train[i]))
distances.sort(key=lambda x: x[0])
k_neighbors = distances[:k]
labels = [label for _, label in k_neighbors]
most_common = Counter(labels).most_common(1)
return most_common[0][0]
# ==============================
# 5. Weighted k-NN
# weight = 1 / d^2
# ==============================
def weighted_knn_predict(X_train, y_train, x_test, k):
distances = []
for i in range(len(X_train)):
dist = euclidean_distance(X_train[i], x_test)
distances.append((dist, y_train[i]))
distances.sort(key=lambda x: x[0])
k_neighbors = distances[:k]
class_weights = {}
for dist, label in k_neighbors:
if dist == 0:
weight = 1e9
else:
weight = 1 / (dist ** 2)
class_weights[label] = class_weights.get(label, 0) + weight
return max(class_weights, key=class_weights.get)
# ==============================
# 6. Test for Different k Values
# ==============================
k_values = [1, 3, 5]
results = []
for k in k_values:
regular_predictions = []
weighted_predictions = []
for x_test in X_test:
regular_predictions.append(knn_predict(X_train, y_train, x_test, k))
weighted_predictions.append(weighted_knn_predict(X_train, y_train, x_test, k))
regular_accuracy = accuracy_score(y_test, regular_predictions)
regular_f1 = f1_score(y_test, regular_predictions, average="weighted")
weighted_accuracy = accuracy_score(y_test, weighted_predictions)
weighted_f1 = f1_score(y_test, weighted_predictions, average="weighted")
results.append({
"k": k,
"Regular k-NN Accuracy": regular_accuracy,
"Regular k-NN F1-score": regular_f1,
"Weighted k-NN Accuracy": weighted_accuracy,
"Weighted k-NN F1-score": weighted_f1
})
# ==============================
# 7. Display Results
# ==============================
results_df = pd.DataFrame(results)
print("\n--- Comparison of Regular k-NN and Weighted k-NN ---")
print(results_df)
# ==============================
# 8. Detailed Report for Best Example k=5
# ==============================
k = 5
regular_predictions = [
knn_predict(X_train, y_train, x_test, k)
for x_test in X_test
]
weighted_predictions = [
weighted_knn_predict(X_train, y_train, x_test, k)
for x_test in X_test
]
print("\n--- Classification Report: Regular k-NN, k=5 ---")
print(classification_report(y_test, regular_predictions, target_names=iris.target_names))
print("\n--- Classification Report: Weighted k-NN, k=5 ---")
print(classification_report(y_test, weighted_predictions, target_names=iris.target_names))OUTPUT:
Feature Names:
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target Names:
['setosa' 'versicolor' 'virginica']
--- Comparison of Regular k-NN and Weighted k-NN ---
k Regular k-NN Accuracy Regular k-NN F1-score Weighted k-NN Accuracy Weighted k-NN F1-score
0 1 0.933333 0.932660 0.933333 0.932660
1 3 0.955556 0.955357 0.955556 0.955357
2 5 0.977778 0.977753 0.955556 0.955357
--- Classification Report: Regular k-NN, k=5 ---
precision recall f1-score support
setosa 1.00 1.00 1.00 15
versicolor 0.94 1.00 0.97 15
virginica 1.00 0.93 0.97 15
accuracy 0.98 45
macro avg 0.98 0.98 0.98 45
weighted avg 0.98 0.98 0.98 45
--- Classification Report: Weighted k-NN, k=5 ---
precision recall f1-score support
setosa 1.00 1.00 1.00 15
versicolor 0.88 1.00 0.94 15
virginica 1.00 0.87 0.93 15
accuracy 0.96 45
macro avg 0.96 0.96 0.96 45
weighted avg 0.96 0.96 0.96 45