Machine Problem 4

Create a simple Python program for linear regression using randomly generated data sets, which is scattered around a line with a slope of 2 and intercept of -2. Present the regression model coefficient and intercept, coefficient of determination (R2), and root mean squared error (RMSE).

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

%matplotlib notebook	

#generating random data with slope=2 and intercept=-2
delta = np.random.uniform(-10,10, size=(20,)) #for generating random data
x = np.arange(20)
y = 2*x-2+delta #y=mx+b; m=slope; b-y-intercept; delta=random scatter
plt.scatter(x,y, c = 'b', label='random data') #scatter plot

#generating the desired line which is y=2x-2
y = 2*x-2 #y=mx+b; m=slope; b-y-intercept
plt.plot(x,y, c='r', linewidth=1, label = 'desired') #plot line

#generating the linear regression from the random data
x = x.reshape(-1, 1)

model = LinearRegression(), y)

y_predict = model.predict(x)

plt.plot(x, y_predict, 'g', linewidth=3, linestyle=':', dash_capstyle='round', label='output') #plot linear regression

#styling the plot
plt.xticks(np.arange(0, 21, 1))
plt.yticks(np.arange(-10, 50, 10))

print("Linear regression coefficient = ", model.coef_[0])
print("Linear regression intercept = ", model.intercept_)
print("coefficient of determination (R\u00b2) = ", r2_score(y, y_predict))
print("root mean squared error (RMSE) = ", np.sqrt(mean_squared_error(y, y_predict)))