Reding Data file from your folder
import os
# Current working directory
print(os.getcwd())
str="/Users/mdfazlulkarimpatwary/Documents/Lectures/Machine Learning for Remot sencing Data/IRS Course"
os.chdir(str)
import pandas as pd
df = pd.read_excel("breast_cancer_full_dataset.xlsx")
Spliting data set
X = df.drop(["target", "target_label"], axis=1)
y = df["target"]
Standardize all the X variables
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
Creating Train ,Test dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42
)
Creating SVM Model
from sklearn.svm import SVC
model = SVC(kernel='rbf', C=1.0, gamma='scale', probability=True)
model.fit(X_train, y_train)
Prediction for Teset data using model
from sklearn.metrics import accuracy_score, classification_report
y_pred = model.predict(X_test)
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
Coss Validation score
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5)
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())
Creationg Learning Curve
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve
train_sizes, train_scores, test_scores = learning_curve(
estimator=model,
X=X,
y=y,
cv=5, # 5-fold cross-validation
train_sizes=np.linspace(0.1, 1.0, 10),
scoring='accuracy',
shuffle=True,
random_state=42
)
train_mean = train_scores.mean(axis=1)
train_std = train_scores.std(axis=1)
test_mean = test_scores.mean(axis=1)
test_std = test_scores.std(axis=1)
Plot accuracy values
plt.figure(figsize=(5, 3))
plt.plot(train_sizes, train_mean, label="Training Accuracy")
plt.fill_between(train_sizes, train_mean-train_std, train_mean+train_std, alpha=0.2)
plt.plot(train_sizes, test_mean, label="Validation Accuracy")
plt.fill_between(train_sizes, test_mean-test_std, test_mean+test_std, alpha=0.2)
plt.title("Learning Curve (SVM)")
plt.xlabel("Training Set Size")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
ROC Curve
from sklearn.metrics import roc_curve, auc
y_scores = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='red', linestyle='--') # random guess line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for SVM')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()
No More
Statlearner
Statlearner