Jupyter notebook

Download jupyter notebook
No. Code
0 # Libraries
1

    import csv

    import pandas as pd

    import numpy as np

    import matplotlib.pyplot as plt

    import seaborn as sn

    import matplotlib.mlab as mlab

    from matplotlib.pyplot import figure, show

    
2 # Loading Dataframe
3

    dataframe = pd.read_csv('train.csv')

    
4 # Describe dataframe
5

    dataframe.describe()

    
6 # Dataframe
7

    dataframe

    
8 # Preparacion del dataframe
9

    df_x = dataframe[['x']]

    df_y = dataframe['y']

    
10 # Normalizar dataframe
11

    df_nor_x = (df_x - df_x.mean())/df_x.std()

    
12 # x
13

    df_nor_x

    
14 # y
15

    df_nor_y = (df_y - df_y.mean())/df_y.std()

    df_nor_y

    
16 # Dataframe de entrenamiento y de prueba
17

    x_train, x_test, y_train, y_test = train_test_split(df_nor_x,df_nor_y,test_size=0.3,random_state=42)

    
18 # Model de regresion lineal
19

    model = LinearRegression()

    
20 # Entrenamiento del model
21

    model.fit(x_train,y_train)

    
22 # Prueba del modelo
23

    predictions = model.predict(x_test)

    
24 # Evaluacion del modelo
25 # Coefficients
26

    model.coef_

    
27 # Independent term
28

    model.intercept_

    
29 # Mean squared error
30

    mean_squared_error(y_test, predictions)

    
31 # Mean absolute error
32

    mean_absolute_error(y_test, predictions)

    
33 # Variance
34

    r2_score(y_test, predictions)

    
35 # Comparacion de los resultados
36

    compare = pd.DataFrame({'Actual':y_test, 'Predicted':predictions})

    
37 # Valores de prueba
38

    compare.Actual.head(10)

    
39 # Valores predichos
40

    compare.Predicted.head(10)

    
41 # Grafica scatter
42

    plt.scatter(y_test,predictions)

    
43 # Grafica de distribucion
44

    sn.distplot(y_test - predictions)

    
45 # Preparacion del dataframe
46

    df_x = dataframe[['x']]

    df_y = dataframe['y']

    
47 # Normalizar dataframe
48

    df_nor_x = (df_x - df_x.mean())/df_x.std()

    
49 # x
50

    df_nor_x

    
51 # y
52

    df_nor_y = (df_y - df_y.mean())/df_y.std()

    df_nor_y

    
53 # Dataframe de entrenamiento y de prueba
54

    x_train, x_test, y_train, y_test = train_test_split(df_nor_x,df_nor_y,test_size=0.3,random_state=42)

    
55 # Model de regresion lineal
56

    model = LinearRegression()

    
57 # Entrenamiento del model
58

    model.fit(x_train,y_train)

    
59 # Prueba del modelo
60

    predictions = model.predict(x_test)

    
61 # Evaluacion del modelo
62 # Coefficients
63

    model.coef_

    
64 # Independent term
65

    model.intercept_

    
66 # Mean squared error
67

    mean_squared_error(y_test, predictions)

    
68 # Mean absolute error
69

    mean_absolute_error(y_test, predictions)

    
70 # Variance
71

    r2_score(y_test, predictions)

    
72 # Comparacion de los resultados
73

    compare = pd.DataFrame({'Actual':y_test, 'Predicted':predictions})

    
74 # Valores de prueba
75

    compare.Actual.head(10)

    
76 # Valores predichos
77

    compare.Predicted.head(10)

    
78 # Grafica scatter
79

    plt.scatter(y_test,predictions)

    
80 # Grafica de distribucion
81

    sn.distplot(y_test - predictions)

    
82 # Preparacion del dataframe
83

    df_x = dataframe[['x']]

    df_y = dataframe['y']

    
84 # Normalizar dataframe
85

    df_nor_x = (df_x - df_x.mean())/df_x.std()

    
86 # x
87

    df_nor_x

    
88 # y
89

    df_nor_y = (df_y - df_y.mean())/df_y.std()

    df_nor_y

    
90 # Dataframe de entrenamiento y de prueba
91

    x_train, x_test, y_train, y_test = train_test_split(df_nor_x,df_nor_y,test_size=0.3,random_state=42)

    
92 # Model de regresion lineal
93

    model = LinearRegression()

    
94 # Entrenamiento del model
95

    model.fit(x_train,y_train)

    
96 # Prueba del modelo
97

    predictions = model.predict(x_test)

    
98 # Evaluacion del modelo
99 # Coefficients
100

    model.coef_

    
101 # Independent term
102

    model.intercept_

    
103 # Mean squared error
104

    mean_squared_error(y_test, predictions)

    
105 # Mean absolute error
106

    mean_absolute_error(y_test, predictions)

    
107 # Variance
108

    r2_score(y_test, predictions)

    
109 # Comparacion de los resultados
110

    compare = pd.DataFrame({'Actual':y_test, 'Predicted':predictions})

    
111 # Valores de prueba
112

    compare.Actual.head(10)

    
113 # Valores predichos
114

    compare.Predicted.head(10)

    
115 # Grafica scatter
116

    plt.scatter(y_test,predictions)

    
117 # Grafica de distribucion
118

    sn.distplot(y_test - predictions)

    
119 # Preparacion del dataframe
120

    df_x = dataframe[['x']]

    df_y = dataframe['y']

    
121 # Normalizar dataframe
122

    df_nor_x = (df_x - df_x.mean())/df_x.std()

    
123 # x
124

    df_nor_x

    
125 # y
126

    df_nor_y = (df_y - df_y.mean())/df_y.std()

    df_nor_y

    
127 # Dataframe de entrenamiento y de prueba
128

    x_train, x_test, y_train, y_test = train_test_split(df_nor_x,df_nor_y,test_size=0.3,random_state=42)

    
129 # Model de regresion lineal
130

    model = LinearRegression()

    
131 # Entrenamiento del model
132

    model.fit(x_train,y_train)

    
133 # Prueba del modelo
134

    predictions = model.predict(x_test)

    
135 # Evaluacion del modelo
136 # Coefficients
137

    model.coef_

    
138 # Independent term
139

    model.intercept_

    
140 # Mean squared error
141

    mean_squared_error(y_test, predictions)

    
142 # Mean absolute error
143

    mean_absolute_error(y_test, predictions)

    
144 # Variance
145

    r2_score(y_test, predictions)

    
146 # Comparacion de los resultados
147

    compare = pd.DataFrame({'Actual':y_test, 'Predicted':predictions})

    
148 # Valores de prueba
149

    compare.Actual.head(10)

    
150 # Valores predichos
151

    compare.Predicted.head(10)

    
152 # Grafica scatter
153

    plt.scatter(y_test,predictions)

    
154 # Grafica de distribucion
155

    sn.distplot(y_test - predictions)

    
156 # Describe
157

    dataframe.describe()

    
158 # Heatmap nulls
159

    sn.heatmap(dataframe.isnull())