Loading, please wait...

VTU Circulars & Notifications

VTU Exam Circulars & Notifications

VTU Exam Time Table

VTU Academic Calendar

BAIL606 Program 4

4. Develop a program to load the Iris dataset. Implement the k-Nearest Neighbors (k-NN) algorithm for classifying flowers based on their features. Split the dataset into training and testing sets and evaluate the model using metrics like accuracy and F1-score. Test it for different values of k (e.g., k=1,3,5) and evaluate the accuracy. Extend the k-NN algorithm to assign weights based on the distance of neighbors (e.g., weight=1/d2 ). Compare the performance of weighted k-NN and regular k-NN on a synthetic or real-world dataset.

PROGRAM:

#install required packages
#pip install numpy pandas scikit-learn

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from collections import Counter

# ==============================
# 1. Load Iris Dataset
# ==============================
iris = load_iris()

X = iris.data
y = iris.target

print("Feature Names:")
print(iris.feature_names)

print("\nTarget Names:")
print(iris.target_names)

# ==============================
# 2. Train-Test Split
# ==============================
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=42,
    stratify=y
)

# ==============================
# 3. Distance Function
# ==============================
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))


# ==============================
# 4. Regular k-NN
# ==============================
def knn_predict(X_train, y_train, x_test, k):
    distances = []

    for i in range(len(X_train)):
        dist = euclidean_distance(X_train[i], x_test)
        distances.append((dist, y_train[i]))

    distances.sort(key=lambda x: x[0])

    k_neighbors = distances[:k]
    labels = [label for _, label in k_neighbors]

    most_common = Counter(labels).most_common(1)
    return most_common[0][0]


# ==============================
# 5. Weighted k-NN
# weight = 1 / d^2
# ==============================
def weighted_knn_predict(X_train, y_train, x_test, k):
    distances = []

    for i in range(len(X_train)):
        dist = euclidean_distance(X_train[i], x_test)
        distances.append((dist, y_train[i]))

    distances.sort(key=lambda x: x[0])

    k_neighbors = distances[:k]

    class_weights = {}

    for dist, label in k_neighbors:
        if dist == 0:
            weight = 1e9
        else:
            weight = 1 / (dist ** 2)

        class_weights[label] = class_weights.get(label, 0) + weight

    return max(class_weights, key=class_weights.get)


# ==============================
# 6. Test for Different k Values
# ==============================
k_values = [1, 3, 5]

results = []

for k in k_values:
    regular_predictions = []
    weighted_predictions = []

    for x_test in X_test:
        regular_predictions.append(knn_predict(X_train, y_train, x_test, k))
        weighted_predictions.append(weighted_knn_predict(X_train, y_train, x_test, k))

    regular_accuracy = accuracy_score(y_test, regular_predictions)
    regular_f1 = f1_score(y_test, regular_predictions, average="weighted")

    weighted_accuracy = accuracy_score(y_test, weighted_predictions)
    weighted_f1 = f1_score(y_test, weighted_predictions, average="weighted")

    results.append({
        "k": k,
        "Regular k-NN Accuracy": regular_accuracy,
        "Regular k-NN F1-score": regular_f1,
        "Weighted k-NN Accuracy": weighted_accuracy,
        "Weighted k-NN F1-score": weighted_f1
    })

# ==============================
# 7. Display Results
# ==============================
results_df = pd.DataFrame(results)

print("\n--- Comparison of Regular k-NN and Weighted k-NN ---")
print(results_df)

# ==============================
# 8. Detailed Report for Best Example k=5
# ==============================
k = 5

regular_predictions = [
    knn_predict(X_train, y_train, x_test, k)
    for x_test in X_test
]

weighted_predictions = [
    weighted_knn_predict(X_train, y_train, x_test, k)
    for x_test in X_test
]

print("\n--- Classification Report: Regular k-NN, k=5 ---")
print(classification_report(y_test, regular_predictions, target_names=iris.target_names))

print("\n--- Classification Report: Weighted k-NN, k=5 ---")
print(classification_report(y_test, weighted_predictions, target_names=iris.target_names))

OUTPUT:

Feature Names:
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

Target Names:
['setosa' 'versicolor' 'virginica']

--- Comparison of Regular k-NN and Weighted k-NN ---
   k  Regular k-NN Accuracy  Regular k-NN F1-score  Weighted k-NN Accuracy  Weighted k-NN F1-score
0  1               0.933333               0.932660                0.933333                0.932660
1  3               0.955556               0.955357                0.955556                0.955357
2  5               0.977778               0.977753                0.955556                0.955357

--- Classification Report: Regular k-NN, k=5 ---
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.94      1.00      0.97        15
   virginica       1.00      0.93      0.97        15

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45


--- Classification Report: Weighted k-NN, k=5 ---
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.88      1.00      0.94        15
   virginica       1.00      0.87      0.93        15

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45
Syllabus Papers
SGPA CGPA