sklearn linear regression example

sklearn linear regression example 

Linear regression is a fundamental technique in data science and machine learning used to predict a continuous target variable based on one or more input features. Scikit-learn, a powerful Python library, simplifies the implementation of linear regression models. 

In this article, we'll walk you through practical examples of simple and multiple linear regression using scikit-learn, complete with hypothetical datasets and clear code snippets.

example 1:Single Linear Regression with Additional Details

Question:
Given a dataset containing the temperature in degrees Celsius and the corresponding number of ice creams sold, build a simple linear regression model to predict the number of ice creams sold based on the temperature.

Hypothetical Data:

temperature       ice_creams_sold
2050
2560
3080
35100
40120

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Creating the hypothetical data
data = {
    'temperature': [20, 25, 30, 35, 40],
    'ice_creams_sold': [50, 60, 80, 100, 120]
}
df = pd.DataFrame(data)

# Defining features and target variable
X = df[['temperature']]
y = df['ice_creams_sold']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Predicting the number of ice creams sold at a new temperature
new_temperature = pd.DataFrame({'temperature': [32]})
predicted_ice_creams = model.predict(new_temperature)
print(f"Predicted number of ice creams sold at 32°C: {predicted_ice_creams[0]}")






example 2:Employee Performance Prediction


import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Define the data
data = {
    'Experience (years)': [5, 8, 10, 3, 6],
    'Education Level (1-5)': [3, 4, 5, 2, 3],
    'Training Hours': [20, 30, 40, 15, 25],
    'Performance (1-10)': [8, 9, 9, 7, 8]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Define the independent variables (features) and dependent variable
X = df[['Experience (years)', 'Education Level (1-5)', 'Training Hours']]
y = df['Performance (1-10)']

# Create and fit the model
model = LinearRegression()
model.fit(X, y)

# Get the model coefficients
coefficients = model.coef_
intercept = model.intercept_

print("Coefficients:", coefficients)
print("Intercept:", intercept)



example 3:Energy Consumption Prediction for a Building


Suppose we want to predict energy consumption of a building based on factors like outdoor temperature, number of occupants, and building size.

import pandas as pd
from sklearn.linear_model import LinearRegression

# Define the data
data = {
    'Outdoor Temperature (°C)': [20, 25, 30],
    'Number of Occupants': [50, 60, 70],
    'Building Size (sqm)': [500, 600, 700],
    'Energy Consumption (kWh)': [1000, 1200, 1400]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Define the independent variables (features) and dependent variable
X = df[['Outdoor Temperature (°C)', 'Number of Occupants', 'Building Size (sqm)']]
y = df['Energy Consumption (kWh)']

# Create and fit the model
model = LinearRegression()
model.fit(X, y)

# Get the model coefficients
coefficients = model.coef_
intercept = model.intercept_

print("Coefficients:", coefficients)
print("Intercept:", intercept)




example 4:Predict a company's profit based on its advertising spend 


This example can be extended to other financial metrics like GDP, but we'll focus on profit and advertising spending for simplicity.

Hypothetical Data

Here's the complete Python code using pandas to fit a linear regression model to this data and visualize the results:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Step 1: Define the Real-Life Hypothetical Data using pandas
data = {
    'Advertising Spend (1000s)': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'Profit (1000s)': [20, 25, 35, 45, 55, 60, 65, 70, 75, 80]
}

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Step 2: Fit a Linear Regression Model
# Define the independent and dependent variables
X = df[['Advertising Spend (1000s)']]
y = df['Profit (1000s)']

# Create the linear regression model
model = LinearRegression()

# Fit the model to the data
model.fit(X, y)

# Get the model parameters
slope = model.coef_[0]
intercept = model.intercept_

print(f"Slope: {slope}")
print(f"Intercept: {intercept}")

# Predict profit values for the regression line
y_pred = model.predict(X)

# Step 3: Visualize the Results
# Plot the original data points
plt.scatter(df['Advertising Spend (1000s)'], df['Profit (1000s)'], color='blue', label='Data points')

# Plot the regression line
plt.plot(df['Advertising Spend (1000s)'], y_pred, color='red', linewidth=2, label='Regression line')

# Add labels and a legend
plt.xlabel('Advertising Spend (1000s)')
plt.ylabel('Profit (1000s)')
plt.title('Advertising Spend vs. Profit Linear Regression')
plt.legend()

# Show the plot
plt.show()

example 5:predict a company's profit based on advertising spend and R&D spend.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from mpl_toolkits.mplot3d import Axes3D

# Step 1: Define the Real-Life Hypothetical Data using pandas
data = {
    'Advertising Spend (1000s)': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'R&D Spend (1000s)': [30, 40, 50, 60, 70, 80, 90, 100, 110, 120],
    'Profit (1000s)': [20, 25, 35, 45, 55, 60, 65, 70, 75, 80]
}

# Create a pandas DataFrame
df = pd.DataFrame(data)

# Step 2: Fit a Multiple Linear Regression Model
# Define the independent variables (features) and dependent variable
X = df[['Advertising Spend (1000s)', 'R&D Spend (1000s)']]
y = df['Profit (1000s)']

# Create the linear regression model
model = LinearRegression()

# Fit the model to the data
model.fit(X, y)

# Get the model parameters
coefficients = model.coef_
intercept = model.intercept_

print(f"Coefficients: {coefficients}")
print(f"Intercept: {intercept}")

# Predict profit values for the regression plane
y_pred = model.predict(X)

# Step 3: Visualize the Results
# Since it's a 3D plot, we need a 3D plotting tool
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Plot the original data points
ax.scatter(df['Advertising Spend (1000s)'], df['R&D Spend (1000s)'], df['Profit (1000s)'], color='blue', label='Data points')

# Create a meshgrid for the plane
xx, yy = np.meshgrid(df['Advertising Spend (1000s)'], df['R&D Spend (1000s)'])
zz = intercept + coefficients[0] * xx + coefficients[1] * yy

# Plot the regression plane
ax.plot_surface(xx, yy, zz, color='red', alpha=0.5)

# Add labels and a legend
ax.set_xlabel('Advertising Spend (1000s)')
ax.set_ylabel('R&D Spend (1000s)')
ax.set_zlabel('Profit (1000s)')
ax.set_title('Advertising and R&D Spend vs. Profit Linear Regression')
ax.legend()

# Show the plot
plt.show()


example 6:Sales Prediction for a Retail Store


import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Define the data
data = {
    'Advertising Spend ($1000s)': [10, 15, 20, 25, 30],
    'Promotions': [2, 3, 2, 4, 3],
    'Temperature (°C)': [25, 28, 30, 32, 35],
    'Sales ($1000s)': [30, 35, 40, 45, 50]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Define the independent variables (features) and dependent variable
X = df[['Advertising Spend ($1000s)', 'Promotions', 'Temperature (°C)']]
y = df['Sales ($1000s)']

# Create and fit the model
model = LinearRegression()
model.fit(X, y)

# Get the model coefficients


coefficients = model.coef_
intercept = model.intercept_

print("Coefficients:", coefficients)
print("Intercept:", intercept)









0 Comments