Random Forest
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
data = {
'Age': ['Youth', 'Youth', 'Middle', 'Senior', 'Senior', 'Senior', 'Middle', 'Youth', 'Youth', 'Senior'],
'Income': ['High', 'High', 'High', 'Medium', 'Low', 'Low', 'Low', 'Medium', 'Low', 'Medium'],
'Student': ['No', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes'],
'Credit_rating': ['Fair', 'Excellent', 'Fair', 'Fair', 'Fair', 'Excellent', 'Excellent', 'Fair', 'Excellent', 'Fair'],
'Buys_Computer': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
}
df = pd.DataFrame(data)
df
| Age | Income | Student | Credit_rating | Buys_Computer | |
|---|---|---|---|---|---|
| 0 | Youth | High | No | Fair | No |
| 1 | Youth | High | No | Excellent | No |
| 2 | Middle | High | No | Fair | Yes |
| 3 | Senior | Medium | No | Fair | Yes |
| 4 | Senior | Low | Yes | Fair | Yes |
| 5 | Senior | Low | Yes | Excellent | No |
| 6 | Middle | Low | Yes | Excellent | Yes |
| 7 | Youth | Medium | No | Fair | No |
| 8 | Youth | Low | Yes | Excellent | Yes |
| 9 | Senior | Medium | Yes | Fair | Yes |
from sklearn.preprocessing import LabelEncoder
for col in df.columns:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
df
| Age | Income | Student | Credit_rating | Buys_Computer | |
|---|---|---|---|---|---|
| 0 | 2 | 0 | 0 | 1 | 0 |
| 1 | 2 | 0 | 0 | 0 | 0 |
| 2 | 0 | 0 | 0 | 1 | 1 |
| 3 | 1 | 2 | 0 | 1 | 1 |
| 4 | 1 | 1 | 1 | 1 | 1 |
| 5 | 1 | 1 | 1 | 0 | 0 |
| 6 | 0 | 1 | 1 | 0 | 1 |
| 7 | 2 | 2 | 0 | 1 | 0 |
| 8 | 2 | 1 | 1 | 0 | 1 |
| 9 | 1 | 2 | 1 | 1 | 1 |
X = df.drop('Buys_Computer', axis=1)
y = df['Buys_Computer']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))
Random Forest Accuracy: 0.3333333333333333
[[1 1]
[1 0]]
precision recall f1-score support
0 0.50 0.50 0.50 2
1 0.00 0.00 0.00 1
accuracy 0.33 3
macro avg 0.25 0.25 0.25 3
weighted avg 0.33 0.33 0.33 3
Statlearner
Statlearner