CAPITULO 13
Español Latino



Opción 1


Opción 2


Opción 3



12 comentarios:

  1. ANÁLISIS EXPLORATORIO
    dataset.dtypes

    Feature engineering
    In [32]:
    def unique_counts(data):
    for i in data.columns:
    count = data[i].nunique()
    print(i, ": ", count)

    unique_counts(df_cleaned)


    ResponderEliminar
  2. GRÁFICA DE DISTRIBUCIÓN HISTOGRAMA

    plt.figure(1 , figsize = (17 , 8))
    n = 0
    for x in ['Age' , 'Annual Income (k$)' , 'Spending Score (1-100)']:
    n += 1
    plt.subplot(1 , 3 , n)
    sns.distplot(data[x] , bins = 20)
    plt.title('Distplot of {}'.format(x))
    plt.show

    ResponderEliminar
  3. TRANSFORMACIÓN VARIABLES CATEGÓRICAS
    1.- CONTAMOS CUANTOS VALORES ÚNICAS HAY EN CADA VARIABLE
    data.Country.nunique()
    out (38)
    2.- ASIGNAMOS A CADA ÚNICO VALOR UN NÚMERO
    INPUT
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    le.fit(data['Country'])
    OUT
    LabelEncoder()
    l = [i for i in range(38)]
    dict(zip(list(le.classes_), l))

    ResponderEliminar
  4. 3.- GUARDAMOS LOS CAMBIOS EN NUESTRO DF
    data['Country'] = le.transform(data['Country'])

    data['Country'] = le.transform(data['Country'])
    with open('labelencoder.pickle', 'wb') as g:
    pickle.dump(le, g)

    ResponderEliminar
  5. CLUSTERING
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns

    import os
    print(os.listdir("../input"))

    1.- SELECCIONAMOS LOS FEATURES

    x = data.iloc[:, [0,5]].values
    print(x)

    ResponderEliminar
  6. 2.- ENCONTRAMOS EL NÚMERO DE CLUSTERS
    from sklearn.cluster import KMeans
    wcss = []
    for i in range(1,11):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
    kmeans.fit(x)
    wcss.append(kmeans.inertia_)
    plt.plot(range(1, 11), wcss)
    plt.title('The Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('WCSS')
    plt.show()

    ResponderEliminar
  7. APLICANDO K-MEANS
    1
    kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42)
    #Let's predict the x
    y_kmeans = kmeans.fit_predict(x)
    2
    print(y_kmeans)
    #We convert our prediction to dataframe so we can easily see this prediction in table form
    df_pred = pd.DataFrame(y_kmeans)
    df_pred.head()

    ResponderEliminar
  8. 3 GRAFICANDO EL CLUSTER
    plt.figure(1 , figsize = (17 , 8))
    plt.scatter(x[y_kmeans == 0, 0], x[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
    plt.scatter(x[y_kmeans == 1, 0], x[y_kmeans == 1, 1], s = 100, c = 'yellow', label = 'Cluster 2')
    plt.scatter(x[y_kmeans == 2, 0], x[y_kmeans == 2, 1], s = 100, c = 'aqua', label = 'Cluster 3')
    plt.scatter(x[y_kmeans == 3, 0], x[y_kmeans == 3, 1], s = 100, c = 'violet', label = 'Cluster 4')
    plt.scatter(x[y_kmeans == 4, 0], x[y_kmeans == 4, 1], s = 100, c = 'lightgreen', label = 'Cluster 5')
    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'navy', label = 'Centroids')
    plt.title('Clusters of customers')
    plt.xlabel('Annual Income (k$)')
    plt.ylabel('Spending Score (1-100)')
    plt.legend()
    plt.show()

    ResponderEliminar
  9. CON ETIQUETA
    plt.figure(1 , figsize = (17 , 8))
    plt.scatter(x[y_kmeans == 0, 0], x[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Standard people')
    plt.scatter(x[y_kmeans == 1, 0], x[y_kmeans == 1, 1], s = 100, c = 'yellow', label = 'Tightwad people')
    plt.scatter(x[y_kmeans == 2, 0], x[y_kmeans == 2, 1], s = 100, c = 'aqua', label = 'Normal people')
    plt.scatter(x[y_kmeans == 3, 0], x[y_kmeans == 3, 1], s = 100, c = 'violet', label = 'Careless people(TARGET)')
    plt.scatter(x[y_kmeans == 4, 0], x[y_kmeans == 4, 1], s = 100, c = 'lightgreen', label = 'Rich people(TARGET)')
    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'navy', label = 'Centroids')
    plt.title('Clusters of customers')
    plt.xlabel('Annual Income (k$)')
    plt.ylabel('Spending Score (1-100)')
    plt.legend()
    plt.show()

    ResponderEliminar