Linear Regression
Description: Models the linear relationship between input features and a continuous target by fitting a line to minimize prediction errors.
Use Cases:
- Real Estate: Predicting house prices based on size and location.
- Finance: Forecasting stock trends from historical data.
Best Library: scikit-learn
View Python Script
import os
import pandas as pd
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from azure.storage.blob import BlobServiceClient
# Azure Variables (Modify for actual use)
azure_account_name = 'your_storage_account_name'
azure_account_key = 'your_storage_account_key'
container_name = 'your_container_name'
blob_prefix = 'train_data_part_'
num_parts = 10
# Connect to Azure Blob Storage
connect_str = f"DefaultEndpointsProtocol=https;AccountName={azure_account_name};AccountKey={azure_account_key};EndpointSuffix=core.windows.net"
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_client = blob_service_client.get_container_client(container_name)
# Initialize Model (Incremental with SGD)
model = SGDRegressor()
scaler = StandardScaler()
# Train Sequentially
for part in range(1, num_parts + 1):
blob_name = f"{blob_prefix}{part}.csv"
blob_client = container_client.get_blob_client(blob_name)
temp_file = f"temp_data_part_{part}.csv"
with open(temp_file, "wb") as f:
download_stream = blob_client.download_blob()
f.write(download_stream.readall())
df = pd.read_csv(temp_file)
X = df.drop('target', axis=1).values
y = df['target'].values
# Scale and Partial Fit
scaler.partial_fit(X)
X_scaled = scaler.transform(X)
model.partial_fit(X_scaled, y)
os.remove(temp_file)
# Save Model
import joblib
joblib.dump(model, 'linear_regression_model.pkl')
print("Training complete. Model saved.")