November 30, 2023

ML

ML

Slip_1

Write a python program to transform data with Principal Component Analysis (PCA). 
Use handwritten digit dataset
import numpy as np
import matplotlib.pyplot as plt
X = np.random.randn(100, 2)
Sigma = np.cov(X.T)
eigvals, eigvecs = np.linalg.eig(Sigma)
idx = eigvals.argsort()[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]
P = eigvecs[:, :2]
X_pca = np.dot(X, P)
plt.scatter(X[:, 0], X[:, 1], alpha=0.5)
plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.5, c='red')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.show()
-------------------------------------------------------------------------------------------------

Slip_2

Write a python program to implement simple Linear Regression for predicting house 
price.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

data = pd.read_csv('/home/cslogin/msc26/15626/csv/Housing.csv')
X = data['area'].values.reshape(-1, 1)
y = data['price'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
slope = model.coef_[0]
intercept = model.intercept_
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Linear Regression Equation: price = {slope:.2f} * area + {intercept:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Regression Line')
plt.xlabel('Area (sqft)')
plt.ylabel('Price ($)')
plt.title('House Price Prediction')
plt.legend()
plt.show()

---------------------------------------------------------------------------------------------------------------

Slip_3

Write a python program to implement multiple Linear Regression for predicting 
 house price
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
data = pd.read_csv('Housing.csv')
X = data[['area', 'bedrooms', 'bathrooms']].values
y = data['price'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Multiple Linear Regression Equation: price = {model.coef_[0]:.2f} * area + {model.coef_[1]:.2f} * bedrooms + {model.coef_[2]:.2f} * bathrooms + {model.intercept_:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")
plt.scatter(X_test[:, 0], y_test, color='blue', label='Actual Data')
plt.plot(X_test[:, 0], y_pred, color='red', linewidth=2, label='Regression Line')
plt.xlabel('Area (sqft)')
plt.ylabel('Price ($)')
plt.title('House Price Prediction')
plt.legend()
plt.show()

--------------------------------------------------------------------------------------------------------------

Slip_4

Write a python program to implement logistic Regression for predicting
 whether a person will buy the insurance or not. Use insurance_data.csv

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
data= pd.read_csv("/home/cslogin/msc26/15626/csv/insurance_data.csv")
X = data[['age']]
y = data['bought_insurance']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Bought', 'Bought'], yticklabels=['Not Bought', 'Bought'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

---------------------------------------------------------------------------------------------------------

Slip_5

Write a python program to implement logistic Regression for handwritten digit 
dataset

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import matplotlib.pyplot as plt
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
plt.figure(figsize=(8, 8))
for i in range(15):
    plt.subplot(3, 5, i + 1)
    plt.imshow(X_test[i].reshape(8, 8), cmap='gray')
    plt.title(f"Predicted: {predictions[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

-------------------------------------------------------------------------------------------------------------

Slip_6

Write a python program to implement Polynomial Regression for positionsal.csv 
dataset.

import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
data = pd.read_csv('/position_sal.csv')
X = data[['Level']]
y = data['Salary']
poly_features = PolynomialFeatures(degree=4)  # You can adjust the degree as needed
X_poly = poly_features.fit_transform(X)
model = LinearRegression()
model.fit(X_poly, y)
y_pred = model.predict(X_poly)
plt.scatter(X, y, color='blue', label='Actual Data')
plt.plot(X, y_pred, color='red', label='Polynomial Regression')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.title('Polynomial Regression')
plt.legend()
plt.show()



------------------------------------------------------------------------------------------------------------

Slip_7

Write a python program to implement Decision Tree Model for classification. 
Use Decision_Tree_Dataset.csv

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
data = pd.read_csv('Decision_Tree_Dataset.csv')
X = data.drop('Target', axis=1)
y = data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, predictions))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions))
-----------------------------------------------------------------------------------------------------------

Slip_8

Write a python program to implement linear SVM for Regression. Use 
position_sal.csv.

import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
data = pd.read_csv('/content/position_sal.csv')
X = data.iloc[:, 1:2].values  # Assuming the independent variable is in the second column
y = data.iloc[:, -1].values   # Assuming the dependent variable (target) is in the last column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
y_train = sc_y.fit_transform(y_train.reshape(-1, 1)).ravel()
svm_regressor = SVR(kernel='linear')
svm_regressor.fit(X_train, y_train)
y_pred = svm_regressor.predict(X_test)
y_pred = y_pred.reshape(-1, 1)  # Reshape predictions to match the expected shape for inverse_transform
y_pred = sc_y.inverse_transform(y_pred)  # Inverse transform predictions to original scale
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.scatter(X_test, y_pred, color='red', label='Predicted')
plt.title('Linear SVM Regression')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.legend()
plt.show()

------------------------------------------------------------------------------------------------------------

Slip_9

Write a python program to implement linear SVM for Classification. Use 
iris.csv.

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
data = pd.read_csv('/content/Iris.csv')
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
svm_classifier = LinearSVC()
svm_classifier.fit(X_train_scaled, y_train)
predictions = svm_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)
print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

------------------------------------------------------------------------------------------------------------

Slip_10

Write a python program to implement k-nearest Neighbors algorithm to build a
prediction model. Use Iris Dataset. 

import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
iris.feature_names
iris.target_names
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df.head()
df['target'] = iris.target
df.head()
df[df.target==1].head()
df[df.target==2].head()
df['flower_name'] =df.target.apply(lambda x: iris.target_names[x])
df.head()
df[45:55]
df0 = df[:50]
df1 = df[50:100]
df2 = df[100:]
import matplotlib.pyplot as plt
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.scatter(df0['sepal length (cm)'], df0['sepal width (cm)'],color="green",marker='+')
plt.scatter(df1['sepal length (cm)'], df1['sepal width (cm)'],color="blue",marker='.')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.scatter(df0['petal length (cm)'], df0['petal width (cm)'],color="green",marker='+')
plt.scatter(df1['petal length (cm)'], df1['petal width (cm)'],color="blue",marker='.')
from sklearn.model_selection import train_test_split
X = df.drop(['target','flower_name'], axis='columns')
y = df.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
len(X_train)
len(X_test)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train,y_train)
print(knn.score(X_test, y_test))
print(knn.predict([[4.8,3.0,1.5,0.3]]))
from sklearn.metrics import confusion_matrix
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
import matplotlib.pyplot as plt
import seaborn as sn
plt.figure(figsize=(7,5))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

-------------------------------------------------------------------------------------------------------------

Slip_11

Write a python program to Implement Naïve Bayes for classification. Use 
titanic.csv/spam.csv dataset.

import pandas as pd
df = pd.read_csv("./titanic.csv")
df.head()
df.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis='columns',inplace=True)
df.head()
inputs = df.drop('Survived',axis='columns')
target = df.Survived
dummies = pd.get_dummies(inputs.Sex)
print(dummies.head(3))
inputs = pd.concat([inputs,dummies],axis='columns')
inputs.head(3)
inputs.drop(['Sex','male'],axis='columns',inplace=True)
inputs.head(3)
inputs.columns[inputs.isna().any()]
inputs.Age[:10]
inputs.Age = inputs.Age.fillna(inputs.Age.mean())
inputs.head()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.3)
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train,y_train)
model.score(X_test,y_test)
X_test[0:10]
y_test[0:10]
print(model.predict(X_test[0:10]))
print(model.predict_proba(X_test[:10]))
from sklearn.model_selection import cross_val_score
print(cross_val_score(GaussianNB(),X_train, y_train, cv=5))

-------------------------------------------------------------------------------------------------------------

Slip_12

Write a python program to implement k-means algorithm. Use income.csv 
dataset.

import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
data = pd.read_csv('/content/income.csv')
features = data[['Income($)', 'Age']]
k = 3
kmeans = KMeans(n_clusters=k)
kmeans.fit(features)
labels = kmeans.labels_
data['Cluster'] = labels
print(data)
plt.scatter(data['Income($)'], data['Age'], c=labels, cmap='rainbow')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red', marker='X', label='Centroids')
plt.title('K-Means Clustering')
plt.xlabel('Income')
plt.ylabel('Age')
plt.legend()
plt.show()
   
-------------------------------------------------------------------------------------------------------------

Slip_13

Write a python program to implement Agglomerative clustering on a income.csv 
dataset.

import pandas as pd
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
data = pd.read_csv('/content/income.csv')
features = data[['Income($)', 'Age']]
n_clusters = 3
agg_cluster = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward')
agg_cluster.fit(features)
data['Cluster'] = agg_cluster.labels_
print(data)
linked = linkage(features, 'ward')
dendrogram(linked, orientation='top', distance_sort='descending', show_leaf_counts=True)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Cluster Distance')
plt.show()
 


-------------------------------------------------------------------------------------------------------------

Slip_14

Write a python program to implement k-means algorithm on a synthetic dataset

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
n_samples = 300
n_features = 2
n_clusters = 3
X, _ = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=42)
kmeans = KMeans(n_clusters=n_clusters)
print(kmeans.fit(X))
cluster_centers = kmeans.cluster_centers_
labels = kmeans.labels_
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.scatter(cluster_centers[:, 0], cluster_centers[:, 1], marker='x', c='red', s=100, label='Cluster Centers')
plt.legend()
plt.title('K-Means Clustering')
plt.show()

---------------------------------------------------------------------------------------------------------------

Slip_15

Write a python program to Implement Naïve Bayes for classification. Use 
titanic.csv/spam.csv dataset.

import pandas as pd
df = pd.read_csv("./titanic.csv")
df.head()
df.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis='columns',inplace=True)
df.head()
inputs = df.drop('Survived',axis='columns')
target = df.Survived
dummies = pd.get_dummies(inputs.Sex)
print(dummies.head(3))
inputs = pd.concat([inputs,dummies],axis='columns')
inputs.head(3)
inputs.drop(['Sex','male'],axis='columns',inplace=True)
inputs.head(3)
inputs.columns[inputs.isna().any()]
inputs.Age[:10]
inputs.Age = inputs.Age.fillna(inputs.Age.mean())
inputs.head()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(inputs,target,test_size=0.3)
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train,y_train)
model.score(X_test,y_test)
X_test[0:10]
y_test[0:10]
print(model.predict(X_test[0:10]))
print(model.predict_proba(X_test[:10]))
from sklearn.model_selection import cross_val_score
print(cross_val_score(GaussianNB(),X_train, y_train, cv=5))

-------------------------------------------------------------------------------------------------------------

Slip_16

Write a python program to implement k-nearest Neighbors algorithm to build a
prediction model. Use Iris Dataset. 

import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
iris.feature_names
iris.target_names
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df.head()
df['target'] = iris.target
df.head()
df[df.target==1].head()
df[df.target==2].head()
df['flower_name'] =df.target.apply(lambda x: iris.target_names[x])
df.head()
df[45:55]
df0 = df[:50]
df1 = df[50:100]
df2 = df[100:]
import matplotlib.pyplot as plt
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.scatter(df0['sepal length (cm)'], df0['sepal width (cm)'],color="green",marker='+')
plt.scatter(df1['sepal length (cm)'], df1['sepal width (cm)'],color="blue",marker='.')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
plt.scatter(df0['petal length (cm)'], df0['petal width (cm)'],color="green",marker='+')
plt.scatter(df1['petal length (cm)'], df1['petal width (cm)'],color="blue",marker='.')
from sklearn.model_selection import train_test_split
X = df.drop(['target','flower_name'], axis='columns')
y = df.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
len(X_train)
len(X_test)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train,y_train)
print(knn.score(X_test, y_test))
print(knn.predict([[4.8,3.0,1.5,0.3]]))
from sklearn.metrics import confusion_matrix
y_pred = knn.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
import matplotlib.pyplot as plt
import seaborn as sn
plt.figure(figsize=(7,5))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

--------------------------------------------------------------------------------------------------------------

Slip_17

Write a python program to implement linear SVM for Classification. Use 
iris.csv.

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
data = pd.read_csv('/content/Iris.csv')
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
svm_classifier = LinearSVC()
svm_classifier.fit(X_train_scaled, y_train)
predictions = svm_classifier.predict(X_test_scaled)
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)
print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

--------------------------------------------------------------------------------------------------------------

Slip_18

Write a python program to implement linear SVM for Regression. Use 
position_sal.csv.

import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
data = pd.read_csv('/content/position_sal.csv')
X = data.iloc[:, 1:2].values  # Assuming the independent variable is in the second column
y = data.iloc[:, -1].values   # Assuming the dependent variable (target) is in the last column
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
y_train = sc_y.fit_transform(y_train.reshape(-1, 1)).ravel()
svm_regressor = SVR(kernel='linear')
svm_regressor.fit(X_train, y_train)
y_pred = svm_regressor.predict(X_test)
y_pred = y_pred.reshape(-1, 1)  # Reshape predictions to match the expected shape for inverse_transform
y_pred = sc_y.inverse_transform(y_pred)  # Inverse transform predictions to original scale
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.scatter(X_test, y_pred, color='red', label='Predicted')
plt.title('Linear SVM Regression')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.legend()
plt.show()
---------------------------------------------------------------------------------------------------------------

Slip_19

Write a python program to implement Decision Tree Model for classification. 
Use Decision_Tree_Dataset.csv

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
data = pd.read_csv('Decision_Tree_Dataset.csv')
X = data.drop('Target', axis=1)
y = data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, predictions))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, predictions))

--------------------------------------------------------------------------------------------------------------

Slip_20

Write a python program to implement Polynomial Regression for given dataset.
Use position_sal.csv

import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
data = pd.read_csv('/home/cslogin/msc26/15626/csv/position_sal.csv')
X = data[['Level']]
y = data['Salary']
poly_features = PolynomialFeatures(degree=4)  # You can adjust the degree as needed
X_poly = poly_features.fit_transform(X)
model = LinearRegression()
model.fit(X_poly, y)
y_pred = model.predict(X_poly)
plt.scatter(X, y, color='blue', label='Actual Data')
plt.plot(X, y_pred, color='red', label='Polynomial Regression')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.title('Polynomial Regression')
plt.legend()
plt.show()

-----------------------------------------------------------------------------------------------------------

Slip_21

Write a python program to implement logistic Regression for handwritten digit 
dataset

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import matplotlib.pyplot as plt
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, random_state=42)
model = LogisticRegression(max_iter=10000)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
plt.figure(figsize=(8, 8))
for i in range(15):
    plt.subplot(3, 5, i + 1)
    plt.imshow(X_test[i].reshape(8, 8), cmap='gray')
    plt.title(f"Predicted: {predictions[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

-------------------------------------------------------------------------------------------------------------

Slip_22

Write a python program to implement logistic Regression for predicting
 whether a person will buy the insurance or not. Use insurance_data.csv

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
data= pd.read_csv("/home/cslogin/msc26/15626/csv/insurance_data.csv")
X = data[['age']]
y = data['bought_insurance']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Bought', 'Bought'], yticklabels=['Not Bought', 'Bought'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

----------------------------------------------------------------------------------------------------------------

Slip_23

Write a python program to implement multiple Linear Regression for predicting 
 house price
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
data = pd.read_csv('Housing.csv')
X = data[['area', 'bedrooms', 'bathrooms']].values
y = data['price'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Multiple Linear Regression Equation: price = {model.coef_[0]:.2f} * area + {model.coef_[1]:.2f} * bedrooms + {model.coef_[2]:.2f} * bathrooms + {model.intercept_:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")
plt.scatter(X_test[:, 0], y_test, color='blue', label='Actual Data')
plt.plot(X_test[:, 0], y_pred, color='red', linewidth=2, label='Regression Line')
plt.xlabel('Area (sqft)')
plt.ylabel('Price ($)')
plt.title('House Price Prediction')
plt.legend()
plt.show()

---------------------------------------------------------------------------------------------------------------

Slip_24

Write a python program to implement simple Linear Regression for predicting house 
price.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

data = pd.read_csv('/home/cslogin/msc26/15626/csv/Housing.csv')
X = data['area'].values.reshape(-1, 1)
y = data['price'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
slope = model.coef_[0]
intercept = model.intercept_
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Linear Regression Equation: price = {slope:.2f} * area + {intercept:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Regression Line')
plt.xlabel('Area (sqft)')
plt.ylabel('Price ($)')
plt.title('House Price Prediction')
plt.legend()
plt.show()

---------------------------------------------------------------------------------------------------------------

Slip_25

Write a python program to transform data with Principal Component Analysis 
(PCA). Consider handwritten digit dataset.

from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
digits = load_digits()
data = digits.data
targets = digits.target
pca = PCA(n_components=2)  # You can change the number of components as needed
transformed_data = pca.fit_transform(data)
plt.figure(figsize=(8, 6))
for i in range(10):
    plt.scatter(transformed_data[targets == i, 0], transformed_data[targets == i, 1], label=str(i))
plt.title('PCA of Handwritten Digits Dataset')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()

------------------------------------------------------------------------------------------------------------