6. Develop a program to demonstrate the working of Linear Regression and Polynomial Regression. Use Boston Housing Dataset for Linear Regression and Auto MPG Dataset (for vehicle fuel efficiency prediction) for Polynomial Regression.
PROGRAM:
#install required packages
#pip install numpy pandas matplotlib scikit-learn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
# =====================================================
# PART 1: LINEAR REGRESSION USING BOSTON HOUSING DATASET
# =====================================================
print("\n================ LINEAR REGRESSION ================")
boston = fetch_openml(name="boston", version=1, as_frame=True)
df_boston = boston.frame
print("\nBoston Dataset Preview:")
print(df_boston.head())
X = df_boston.drop("MEDV", axis=1)
y = df_boston["MEDV"]
X = X.apply(pd.to_numeric)
y = pd.to_numeric(y)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred = linear_model.predict(X_test)
print("\nLinear Regression Results:")
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))
plt.figure(figsize=(8, 5))
plt.scatter(y_test, y_pred)
plt.xlabel("Actual House Price")
plt.ylabel("Predicted House Price")
plt.title("Linear Regression: Actual vs Predicted")
plt.grid(True)
plt.show()
# =====================================================
# PART 2: POLYNOMIAL REGRESSION USING AUTO MPG DATASET
# =====================================================
print("\n================ POLYNOMIAL REGRESSION ================")
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/mpg.csv"
auto_df = pd.read_csv(url)
print("\nAuto MPG Dataset Preview:")
print(auto_df.head())
auto_df = auto_df.dropna()
X = auto_df[["horsepower"]]
y = auto_df["mpg"]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
degree = 2
poly = PolynomialFeatures(degree=degree)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
y_poly_pred = poly_model.predict(X_test_poly)
print("\nPolynomial Regression Results:")
print("Mean Squared Error:", mean_squared_error(y_test, y_poly_pred))
print("R2 Score:", r2_score(y_test, y_poly_pred))
# Fixed: use DataFrame with same column name to avoid warning
X_range = pd.DataFrame(
np.linspace(
X["horsepower"].min(),
X["horsepower"].max(),
300
),
columns=["horsepower"]
)
X_range_poly = poly.transform(X_range)
y_range_pred = poly_model.predict(X_range_poly)
plt.figure(figsize=(8, 5))
plt.scatter(X, y, label="Actual Data")
plt.plot(X_range, y_range_pred, label="Polynomial Regression Curve")
plt.xlabel("Horsepower")
plt.ylabel("MPG")
plt.title("Polynomial Regression: Horsepower vs MPG")
plt.legend()
plt.grid(True)
plt.show()OUTPUT:
================ LINEAR REGRESSION ================
Boston Dataset Preview:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT MEDV
0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296.0 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242.0 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242.0 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222.0 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222.0 18.7 396.90 5.33 36.2
Linear Regression Results:
Mean Squared Error: 24.291119474973538
R2 Score: 0.6687594935356317
================ POLYNOMIAL REGRESSION ================
Auto MPG Dataset Preview:
mpg cylinders displacement horsepower weight acceleration model_year origin name
0 18.0 8 307.0 130.0 3504 12.0 70 usa chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693 11.5 70 usa buick skylark 320
2 18.0 8 318.0 150.0 3436 11.0 70 usa plymouth satellite
3 16.0 8 304.0 150.0 3433 12.0 70 usa amc rebel sst
4 17.0 8 302.0 140.0 3449 10.5 70 usa ford torino
Polynomial Regression Results:
Mean Squared Error: 18.416967796017616
R2 Score: 0.6391701147013347

