from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline
df = datasets.load_iris()
#df = datasets.load_breast_cancer()
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
y_pred = clf.fit(df.data, df.target).predict(df.data)
print(f'Precisión de la validación cruzada: {(cross_val_score(clf, df.data, df.target, cv=5)).mean():.2f}%')
cv_results = cross_validate(clf, df.data, df.target, cv=5)
scores = cross_validate(clf, df.data, df.target, cv=5, scoring=('accuracy'), return_train_score=True)
print(scores['test_score'])
cm = confusion_matrix(df.target, y_pred)
print(cm)
from sklearn.svm import SVC
from sklearn import svm
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
df = datasets.load_breast_cancer()
clf = SVC(kernel='linear', gamma='auto')
y_pred = clf.fit(df.data, df.target).predict(df.data)
print(f'Precisión de la validación cruzada: {(cross_val_score(clf, df.data, df.target, cv=5)).mean():.2f}%')
cv_results = cross_validate(clf, df.data, df.target, cv=5)
scores = cross_validate(clf, df.data, df.target, cv=5, scoring=('accuracy', 'roc_auc'), return_train_score=True)
print(scores['test_accuracy'])
print(scores['test_roc_auc'])
cm = confusion_matrix(df.target, y_pred)
print(cm)
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
# Importar iris
iris = datasets.load_iris()
X = iris.data
y = iris.target
# BBinarizar las salidas
y = label_binarize(y, classes=[0, 1,2 ])
n_classes = y.shape[1]
# Añadir ruido para hacer el problemá más complejo
random_state = np.random.RandomState(0)
#n_samples, n_features = X.shape
#X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
# barajar y dividir conjunto de train y test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
random_state=random_state)
# Learn to predict each class against the other
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,
random_state=random_state))
y_score = classifier.fit(X_train, y_train).decision_function(X_test)
# Calcular ROC y AUC para cada clase
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# Calcular ROC y AUC micro-average
fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
plt.figure()
lw = 2
plt.plot(fpr[2], tpr[2], color='darkorange',
lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2])
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
#df = datasets.load_iris()
df = datasets.load_breast_cancer()
from sklearn import tree
clf = tree.DecisionTreeClassifier(criterion='entropy')
y_pred = clf.fit(df.data, df.target).predict(df.data)
print(f'Precisión de la validación cruzada: {(cross_val_score(clf, df.data, df.target, cv=5)).mean():.2f}%')
from matplotlib.pyplot import figure
figure(dpi=200)
tree_plot = tree.plot_tree(clf, feature_names=df['feature_names'])
from sklearn.externals.six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
dot_data = StringIO()
export_graphviz(clf, out_file=dot_data, filled=True, rounded=True, special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
clf = KNeighborsClassifier(n_neighbors=3)
y_pred = clf.fit(df.data, df.target).predict(df.data)
print(f'Precisión de la validación cruzada: {(cross_val_score(clf, df.data, df.target, cv=5)).mean():.2f}%')
cv_results = cross_validate(clf, df.data, df.target, cv=5)
scores = cross_validate(clf, df.data, df.target, cv=5, scoring=('accuracy'), return_train_score=True)
print(scores['test_score'])
cm = confusion_matrix(df.target, y_pred)
print(cm)
#Vamos a hacer un recorrido desde k=1 a k=25
X = df.data
y = df.target
# Añadir ruido para hacer el problema más complejo
random_state = np.random.RandomState(0)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 2*n_features)]
# barajar y dividir conjunto de train y test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=4)
k_range = range(1,26)
scores = {}
scores_list = []
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
scores[k] = metrics.accuracy_score(y_test,y_pred)
scores_list.append(scores[k])
plt.plot(k_range,scores_list)
plt.xlabel('Valor de k para kNN')
plt.ylabel('Test accuracy')
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals.six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus
clf = RandomForestClassifier(n_estimators=100)
y_pred = clf.fit(df.data, df.target).predict(df.data)
print(f'Precisión de la validación cruzada: {(cross_val_score(clf, df.data, df.target, cv=5)).mean():.2f}%')
cv_results = cross_validate(clf, df.data, df.target, cv=5)
scores = cross_validate(clf, df.data, df.target, cv=5, scoring=('accuracy'), return_train_score=True)
print(scores['test_score'])
cm = confusion_matrix(df.target, y_pred)
print(cm)
#Extraemos el árbol número 5
concrete_tree = clf.estimators_[5]
dot_data = StringIO()
export_graphviz(concrete_tree, out_file=dot_data, filled=True, rounded=True, special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
Image(graph.create_png())
from sklearn.ensemble import GradientBoostingClassifier
from matplotlib.pyplot import figure
clf = GradientBoostingClassifier()
y_pred = clf.fit(df.data, df.target).predict(df.data)
print(f'Precisión de la validación cruzada: {(cross_val_score(clf, df.data, df.target, cv=5)).mean():.2f}%')
cv_results = cross_validate(clf, df.data, df.target, cv=5)
scores = cross_validate(clf, df.data, df.target, cv=5, scoring=('accuracy'), return_train_score=True)
print(scores['test_score'])
cm = confusion_matrix(df.target, y_pred)
print(cm)
from xgboost import XGBClassifier
from xgboost import plot_tree
import matplotlib.pyplot as plt
# load data
X = df.data
y = df.target
# ajusta
model = XGBClassifier()
model.fit(X, y)
# dibuja el primer árbol horizontal
#plot_tree(model, rankdir='LR')
#dibuja el quinto árbol en vertical
plot_tree(model,num_trees=4)
fig = plt.gcf()
fig.set_size_inches(150, 100)
fig.savefig('tree.png')
plt.show()
from sklearn.ensemble import AdaBoostClassifier
clf_base = tree.DecisionTreeClassifier()
clf = AdaBoostClassifier(base_estimator=clf_base)
y_pred = clf.fit(df.data, df.target).predict(df.data)
print(f'Precisión de la validación cruzada: {(cross_val_score(clf, df.data, df.target, cv=5)).mean():.2f}%')
cv_results = cross_validate(clf, df.data, df.target, cv=5)
scores = cross_validate(clf, df.data, df.target, cv=5, scoring=('accuracy'), return_train_score=True)
print(scores['test_score'])
cm = confusion_matrix(df.target, y_pred)
print(cm)