Data Science Project
This example demonstrates building a complete data science project using PPM to manage both Python data processing/ML libraries and JavaScript visualization tools.Project Overview
We’ll create a Stock Price Prediction System that:- Backend (Python): Fetches financial data, processes it, and trains ML models
- Frontend (JavaScript): Creates interactive dashboards and visualizations
- Data Pipeline: Automated data collection and model training
- API: RESTful service for predictions and data access
Project Setup
Initialize the Project
1
Create Project Directory
Copy
mkdir stock-predictor
cd stock-predictor
2
Initialize PPM Project
Copy
ppm init stock-predictor --template data-science
3
Add Dependencies
Copy
# Python data science stack
ppm add pandas numpy scikit-learn matplotlib seaborn
ppm add yfinance requests python-dotenv fastapi uvicorn
# JavaScript visualization tools
ppm add --js d3 chart.js plotly.js express cors
ppm add --js --dev vite @vitejs/plugin-vanilla
Project Configuration
Copy
# project.toml
[project]
name = "stock-predictor"
version = "1.0.0"
description = "ML-powered stock price prediction with interactive visualization"
[dependencies.python]
pandas = "^2.1.0"
numpy = "^1.24.0"
scikit-learn = "^1.3.0"
matplotlib = "^3.7.0"
seaborn = "^0.12.0"
yfinance = "^0.2.0"
requests = "^2.31.0"
python-dotenv = "^1.0.0"
fastapi = "^0.104.0"
uvicorn = "^0.24.0"
[dependencies.js]
d3 = "^7.8.0"
"chart.js" = "^4.4.0"
"plotly.js" = "^2.26.0"
express = "^4.18.0"
cors = "^2.8.0"
[dev-dependencies.python]
pytest = "^7.4.0"
jupyter = "^1.0.0"
ipykernel = "^6.25.0"
[dev-dependencies.js]
vite = "^5.0.0"
"@vitejs/plugin-vanilla" = "^1.0.0"
[scripts]
# Data pipeline
fetch-data = "python scripts/fetch_data.py"
train-model = "python scripts/train_model.py"
pipeline = ["ppm run fetch-data", "ppm run train-model"]
# Servers
api = "uvicorn src.api:app --reload --host 0.0.0.0 --port 8000"
web = "vite --host 0.0.0.0 --port 3000"
dev = ["ppm run api &", "ppm run web"]
# Analysis
notebook = "jupyter lab"
analyze = "python scripts/analyze.py"
Project Structure
Create the following directory structure:Copy
stock-predictor/
├── project.toml
├── data/
│ ├── raw/
│ ├── processed/
│ └── models/
├── src/
│ ├── api.py # FastAPI backend
│ ├── models/
│ │ ├── predictor.py
│ │ └── data_processor.py
│ └── utils/
│ └── helpers.py
├── scripts/
│ ├── fetch_data.py # Data collection
│ ├── train_model.py # Model training
│ └── analyze.py # Analysis scripts
├── web/
│ ├── index.html
│ ├── src/
│ │ ├── main.js
│ │ ├── charts.js
│ │ └── api.js
│ └── style.css
├── notebooks/
│ └── exploration.ipynb
└── tests/
├── test_models.py
└── test_api.py
Backend Implementation
Data Processor
Copy
# src/models/data_processor.py
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pickle
import os
class StockDataProcessor:
def __init__(self, symbol, period="2y"):
self.symbol = symbol
self.period = period
self.scaler = MinMaxScaler()
self.data = None
def fetch_data(self):
"""Fetch stock data from Yahoo Finance"""
ticker = yf.Ticker(self.symbol)
self.data = ticker.history(period=self.period)
return self.data
def prepare_features(self, lookback=60):
"""Prepare features for ML model"""
if self.data is None:
self.fetch_data()
# Calculate technical indicators
df = self.data.copy()
# Moving averages
df['MA_7'] = df['Close'].rolling(window=7).mean()
df['MA_21'] = df['Close'].rolling(window=21).mean()
df['MA_50'] = df['Close'].rolling(window=50).mean()
# Relative Strength Index (RSI)
delta = df['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
df['RSI'] = 100 - (100 / (1 + rs))
# Bollinger Bands
rolling_mean = df['Close'].rolling(window=20).mean()
rolling_std = df['Close'].rolling(window=20).std()
df['BB_upper'] = rolling_mean + (rolling_std * 2)
df['BB_lower'] = rolling_mean - (rolling_std * 2)
# Volume indicators
df['Volume_MA'] = df['Volume'].rolling(window=20).mean()
df['Volume_ratio'] = df['Volume'] / df['Volume_MA']
# Price features
df['Price_change'] = df['Close'].pct_change()
df['High_Low_ratio'] = df['High'] / df['Low']
# Remove NaN values
df = df.dropna()
# Prepare sequences for LSTM
feature_columns = [
'Open', 'High', 'Low', 'Close', 'Volume',
'MA_7', 'MA_21', 'MA_50', 'RSI',
'BB_upper', 'BB_lower', 'Volume_ratio',
'Price_change', 'High_Low_ratio'
]
features = df[feature_columns].values
targets = df['Close'].values
# Scale features
features_scaled = self.scaler.fit_transform(features)
# Create sequences
X, y = [], []
for i in range(lookback, len(features_scaled)):
X.append(features_scaled[i-lookback:i])
y.append(targets[i])
return np.array(X), np.array(y), df.index[lookback:]
def save_scaler(self, path):
"""Save the fitted scaler"""
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f:
pickle.dump(self.scaler, f)
def load_scaler(self, path):
"""Load a fitted scaler"""
with open(path, 'rb') as f:
self.scaler = pickle.load(f)
ML Model
Copy
# src/models/predictor.py
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import os
class StockPredictor:
def __init__(self):
self.models = {
'random_forest': RandomForestRegressor(n_estimators=100, random_state=42),
'gradient_boost': GradientBoostingRegressor(n_estimators=100, random_state=42),
'linear': LinearRegression()
}
self.trained_models = {}
self.best_model = None
def train(self, X_train, y_train, X_test, y_test):
"""Train multiple models and select the best one"""
results = {}
for name, model in self.models.items():
print(f"Training {name}...")
# Reshape data for non-LSTM models
X_train_reshaped = X_train.reshape(X_train.shape[0], -1)
X_test_reshaped = X_test.reshape(X_test.shape[0], -1)
# Train model
model.fit(X_train_reshaped, y_train)
# Make predictions
y_pred = model.predict(X_test_reshaped)
# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
results[name] = {
'model': model,
'mse': mse,
'mae': mae,
'r2': r2,
'predictions': y_pred
}
print(f"{name} - MSE: {mse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")
# Select best model based on R² score
best_name = max(results.keys(), key=lambda k: results[k]['r2'])
self.best_model = results[best_name]['model']
self.trained_models = results
print(f"\nBest model: {best_name}")
return results
def predict(self, X):
"""Make predictions using the best model"""
if self.best_model is None:
raise ValueError("No trained model available")
X_reshaped = X.reshape(X.shape[0], -1)
return self.best_model.predict(X_reshaped)
def predict_next(self, X_last, steps=5):
"""Predict next N steps"""
predictions = []
current_sequence = X_last.copy()
for _ in range(steps):
pred = self.predict(current_sequence.reshape(1, *current_sequence.shape))[0]
predictions.append(pred)
# Update sequence (simplified - in practice, you'd update all features)
current_sequence = np.roll(current_sequence, -1, axis=0)
current_sequence[-1, -1] = pred # Update close price
return np.array(predictions)
def save_model(self, path):
"""Save the best model"""
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f:
pickle.dump(self.best_model, f)
def load_model(self, path):
"""Load a trained model"""
with open(path, 'rb') as f:
self.best_model = pickle.load(f)
FastAPI Backend
Copy
# src/api.py
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
from models.data_processor import StockDataProcessor
from models.predictor import StockPredictor
app = FastAPI(title="Stock Predictor API", version="1.0.0")
# Enable CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Global variables
predictor = StockPredictor()
processor = StockDataProcessor("AAPL")
# Load models if they exist
if os.path.exists("data/models/best_model.pkl"):
predictor.load_model("data/models/best_model.pkl")
if os.path.exists("data/models/scaler.pkl"):
processor.load_scaler("data/models/scaler.pkl")
class PredictionRequest(BaseModel):
symbol: str
days: int = 5
class TrainRequest(BaseModel):
symbol: str
period: str = "2y"
@app.get("/")
async def root():
return {"message": "Stock Predictor API", "version": "1.0.0"}
@app.get("/stocks/{symbol}/data")
async def get_stock_data(symbol: str, period: str = "1y"):
"""Get historical stock data"""
try:
processor = StockDataProcessor(symbol, period)
data = processor.fetch_data()
# Convert to JSON-serializable format
result = []
for date, row in data.iterrows():
result.append({
"date": date.strftime("%Y-%m-%d"),
"open": float(row["Open"]),
"high": float(row["High"]),
"low": float(row["Low"]),
"close": float(row["Close"]),
"volume": int(row["Volume"])
})
return {"symbol": symbol, "data": result}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.post("/predict")
async def predict_stock(request: PredictionRequest):
"""Predict future stock prices"""
try:
if predictor.best_model is None:
raise HTTPException(status_code=400, detail="No trained model available")
# Get recent data for prediction
processor = StockDataProcessor(request.symbol)
X, y, dates = processor.prepare_features()
if len(X) == 0:
raise HTTPException(status_code=400, detail="Insufficient data for prediction")
# Use last sequence for prediction
last_sequence = X[-1]
predictions = predictor.predict_next(last_sequence, request.days)
# Generate future dates
last_date = dates[-1]
future_dates = []
for i in range(1, request.days + 1):
future_date = last_date + timedelta(days=i)
# Skip weekends (simplified)
while future_date.weekday() >= 5:
future_date += timedelta(days=1)
future_dates.append(future_date.strftime("%Y-%m-%d"))
result = []
for date, price in zip(future_dates, predictions):
result.append({
"date": date,
"predicted_price": float(price)
})
return {
"symbol": request.symbol,
"predictions": result,
"last_actual_price": float(y[-1]),
"prediction_confidence": "medium" # Simplified
}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
@app.post("/train")
async def train_model(request: TrainRequest):
"""Train the prediction model"""
try:
# Prepare data
processor = StockDataProcessor(request.symbol, request.period)
X, y, dates = processor.prepare_features()
if len(X) < 100:
raise HTTPException(status_code=400, detail="Insufficient data for training")
# Split data
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]
# Train models
results = predictor.train(X_train, y_train, X_test, y_test)
# Save model and scaler
predictor.save_model("data/models/best_model.pkl")
processor.save_scaler("data/models/scaler.pkl")
# Return training results
return {
"symbol": request.symbol,
"training_samples": len(X_train),
"test_samples": len(X_test),
"model_performance": {
name: {
"mse": float(metrics["mse"]),
"mae": float(metrics["mae"]),
"r2": float(metrics["r2"])
}
for name, metrics in results.items()
}
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {
"status": "healthy",
"model_loaded": predictor.best_model is not None,
"timestamp": datetime.now().isoformat()
}
Frontend Implementation
HTML Structure
Copy
<!-- web/index.html -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Stock Predictor Dashboard</title>
<link rel="stylesheet" href="style.css">
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
<div class="container">
<header>
<h1>📈 Stock Predictor Dashboard</h1>
<div class="controls">
<input type="text" id="symbolInput" placeholder="Enter stock symbol (e.g., AAPL)" value="AAPL">
<button id="loadDataBtn">Load Data</button>
<button id="predictBtn">Predict</button>
<button id="trainBtn">Train Model</button>
</div>
</header>
<main>
<div class="grid">
<div class="card">
<h2>Stock Price History</h2>
<div id="priceChart"></div>
</div>
<div class="card">
<h2>Predictions</h2>
<div id="predictionChart"></div>
</div>
<div class="card">
<h2>Technical Indicators</h2>
<div id="indicatorsChart"></div>
</div>
<div class="card">
<h2>Model Performance</h2>
<div id="performanceChart"></div>
</div>
</div>
<div class="stats">
<div class="stat-card">
<h3>Current Price</h3>
<div id="currentPrice">-</div>
</div>
<div class="stat-card">
<h3>Predicted Change</h3>
<div id="predictedChange">-</div>
</div>
<div class="stat-card">
<h3>Confidence</h3>
<div id="confidence">-</div>
</div>
<div class="stat-card">
<h3>Last Updated</h3>
<div id="lastUpdated">-</div>
</div>
</div>
</main>
</div>
<script type="module" src="src/main.js"></script>
</body>
</html>
JavaScript Implementation
Copy
// web/src/api.js
const API_BASE = 'http://localhost:8000';
export class StockAPI {
async getStockData(symbol, period = '1y') {
const response = await fetch(`${API_BASE}/stocks/${symbol}/data?period=${period}`);
if (!response.ok) throw new Error(`Failed to fetch data: ${response.statusText}`);
return response.json();
}
async predict(symbol, days = 5) {
const response = await fetch(`${API_BASE}/predict`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ symbol, days })
});
if (!response.ok) throw new Error(`Prediction failed: ${response.statusText}`);
return response.json();
}
async trainModel(symbol, period = '2y') {
const response = await fetch(`${API_BASE}/train`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ symbol, period })
});
if (!response.ok) throw new Error(`Training failed: ${response.statusText}`);
return response.json();
}
async healthCheck() {
const response = await fetch(`${API_BASE}/health`);
return response.json();
}
}
Copy
// web/src/charts.js
export class ChartManager {
constructor() {
this.charts = {};
}
createPriceChart(data, predictions = null) {
const trace1 = {
x: data.map(d => d.date),
y: data.map(d => d.close),
type: 'scatter',
mode: 'lines',
name: 'Actual Price',
line: { color: '#2196F3' }
};
const traces = [trace1];
if (predictions) {
const trace2 = {
x: predictions.map(p => p.date),
y: predictions.map(p => p.predicted_price),
type: 'scatter',
mode: 'lines+markers',
name: 'Predictions',
line: { color: '#FF5722', dash: 'dash' },
marker: { size: 8 }
};
traces.push(trace2);
}
const layout = {
title: 'Stock Price History & Predictions',
xaxis: { title: 'Date' },
yaxis: { title: 'Price ($)' },
showlegend: true
};
Plotly.newPlot('priceChart', traces, layout);
}
createCandlestickChart(data) {
const trace = {
x: data.map(d => d.date),
close: data.map(d => d.close),
decreasing: { line: { color: '#FF5722' }},
high: data.map(d => d.high),
increasing: { line: { color: '#4CAF50' }},
low: data.map(d => d.low),
open: data.map(d => d.open),
type: 'candlestick',
xaxis: 'x',
yaxis: 'y'
};
const layout = {
title: 'Candlestick Chart',
dragmode: 'zoom',
showlegend: false,
xaxis: {
autorange: true,
title: 'Date',
type: 'date'
},
yaxis: {
autorange: true,
title: 'Price ($)',
type: 'linear'
}
};
Plotly.newPlot('indicatorsChart', [trace], layout);
}
createVolumeChart(data) {
const ctx = document.getElementById('performanceChart');
if (this.charts.volume) {
this.charts.volume.destroy();
}
this.charts.volume = new Chart(ctx, {
type: 'bar',
data: {
labels: data.slice(-30).map(d => d.date),
datasets: [{
label: 'Volume',
data: data.slice(-30).map(d => d.volume),
backgroundColor: 'rgba(156, 39, 176, 0.3)',
borderColor: 'rgba(156, 39, 176, 1)',
borderWidth: 1
}]
},
options: {
responsive: true,
scales: {
y: {
beginAtZero: true,
title: {
display: true,
text: 'Volume'
}
}
}
}
});
}
updatePredictionChart(predictions, currentPrice) {
const dates = predictions.map(p => p.date);
const prices = predictions.map(p => p.predicted_price);
// Add current price as starting point
dates.unshift('Today');
prices.unshift(currentPrice);
const trace = {
x: dates,
y: prices,
type: 'scatter',
mode: 'lines+markers',
name: 'Predictions',
line: { color: '#FF9800', width: 3 },
marker: { size: 8, color: '#FF9800' }
};
const layout = {
title: 'Price Predictions',
xaxis: { title: 'Date' },
yaxis: { title: 'Predicted Price ($)' },
showlegend: false
};
Plotly.newPlot('predictionChart', [trace], layout);
}
}
Copy
// web/src/main.js
import { StockAPI } from './api.js';
import { ChartManager } from './charts.js';
class StockDashboard {
constructor() {
this.api = new StockAPI();
this.charts = new ChartManager();
this.currentData = null;
this.currentSymbol = 'AAPL';
this.initializeEventListeners();
this.loadInitialData();
}
initializeEventListeners() {
document.getElementById('loadDataBtn').addEventListener('click', () => {
this.loadStockData();
});
document.getElementById('predictBtn').addEventListener('click', () => {
this.makePrediction();
});
document.getElementById('trainBtn').addEventListener('click', () => {
this.trainModel();
});
document.getElementById('symbolInput').addEventListener('keypress', (e) => {
if (e.key === 'Enter') {
this.loadStockData();
}
});
}
async loadInitialData() {
try {
await this.loadStockData();
await this.checkModelStatus();
} catch (error) {
console.error('Failed to load initial data:', error);
this.showError('Failed to load initial data');
}
}
async loadStockData() {
const symbol = document.getElementById('symbolInput').value.toUpperCase();
if (!symbol) return;
this.currentSymbol = symbol;
this.showLoading('Loading stock data...');
try {
const result = await this.api.getStockData(symbol, '1y');
this.currentData = result.data;
// Update charts
this.charts.createPriceChart(this.currentData);
this.charts.createCandlestickChart(this.currentData);
this.charts.createVolumeChart(this.currentData);
// Update stats
const latestData = this.currentData[this.currentData.length - 1];
document.getElementById('currentPrice').textContent = `$${latestData.close.toFixed(2)}`;
document.getElementById('lastUpdated').textContent = new Date().toLocaleString();
this.hideLoading();
} catch (error) {
console.error('Error loading stock data:', error);
this.showError('Failed to load stock data');
}
}
async makePrediction() {
if (!this.currentSymbol) return;
this.showLoading('Making predictions...');
try {
const result = await this.api.predict(this.currentSymbol, 5);
// Update prediction chart
this.charts.updatePredictionChart(result.predictions, result.last_actual_price);
// Update stats
const firstPrediction = result.predictions[0];
const lastPrediction = result.predictions[result.predictions.length - 1];
const change = ((lastPrediction.predicted_price - result.last_actual_price) / result.last_actual_price) * 100;
document.getElementById('predictedChange').textContent = `${change > 0 ? '+' : ''}${change.toFixed(2)}%`;
document.getElementById('predictedChange').className = change > 0 ? 'positive' : 'negative';
document.getElementById('confidence').textContent = result.prediction_confidence;
this.hideLoading();
} catch (error) {
console.error('Error making prediction:', error);
this.showError('Failed to make prediction. Train the model first.');
}
}
async trainModel() {
if (!this.currentSymbol) return;
this.showLoading('Training model... This may take a while.');
try {
const result = await this.api.trainModel(this.currentSymbol, '2y');
// Show training results
console.log('Training completed:', result);
alert(`Model trained successfully!\nTraining samples: ${result.training_samples}\nBest model R²: ${Object.values(result.model_performance)[0].r2.toFixed(4)}`);
this.hideLoading();
} catch (error) {
console.error('Error training model:', error);
this.showError('Failed to train model');
}
}
async checkModelStatus() {
try {
const health = await this.api.healthCheck();
const statusEl = document.createElement('div');
statusEl.className = 'status';
statusEl.innerHTML = `Model Status: ${health.model_loaded ? '✅ Loaded' : '❌ Not trained'}`;
document.querySelector('.controls').appendChild(statusEl);
} catch (error) {
console.error('Error checking model status:', error);
}
}
showLoading(message) {
const loadingEl = document.getElementById('loading') || document.createElement('div');
loadingEl.id = 'loading';
loadingEl.className = 'loading';
loadingEl.textContent = message;
document.body.appendChild(loadingEl);
}
hideLoading() {
const loadingEl = document.getElementById('loading');
if (loadingEl) loadingEl.remove();
}
showError(message) {
this.hideLoading();
alert(`Error: ${message}`);
}
}
// Initialize dashboard when page loads
document.addEventListener('DOMContentLoaded', () => {
new StockDashboard();
});
CSS Styling
Copy
/* web/style.css */
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
color: #333;
}
.container {
max-width: 1400px;
margin: 0 auto;
padding: 20px;
}
header {
background: white;
border-radius: 12px;
padding: 24px;
margin-bottom: 24px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
header h1 {
font-size: 2.5rem;
margin-bottom: 16px;
background: linear-gradient(45deg, #667eea, #764ba2);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.controls {
display: flex;
gap: 12px;
align-items: center;
flex-wrap: wrap;
}
.controls input {
padding: 12px 16px;
border: 2px solid #e1e5e9;
border-radius: 8px;
font-size: 14px;
min-width: 200px;
}
.controls button {
padding: 12px 24px;
background: linear-gradient(45deg, #667eea, #764ba2);
color: white;
border: none;
border-radius: 8px;
font-weight: 600;
cursor: pointer;
transition: transform 0.2s;
}
.controls button:hover {
transform: translateY(-2px);
}
.status {
margin-left: auto;
padding: 8px 16px;
background: #f8f9fa;
border-radius: 20px;
font-size: 14px;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(600px, 1fr));
gap: 24px;
margin-bottom: 24px;
}
.card {
background: white;
border-radius: 12px;
padding: 24px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.card h2 {
margin-bottom: 16px;
color: #2c3e50;
font-size: 1.25rem;
}
.stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 16px;
}
.stat-card {
background: white;
border-radius: 8px;
padding: 20px;
text-align: center;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
.stat-card h3 {
font-size: 14px;
color: #666;
margin-bottom: 8px;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.stat-card div {
font-size: 24px;
font-weight: bold;
color: #2c3e50;
}
.positive {
color: #4CAF50 !important;
}
.negative {
color: #F44336 !important;
}
.loading {
position: fixed;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
background: rgba(0, 0, 0, 0.8);
color: white;
padding: 20px 40px;
border-radius: 8px;
font-size: 16px;
z-index: 1000;
}
@media (max-width: 768px) {
.grid {
grid-template-columns: 1fr;
}
.controls {
flex-direction: column;
align-items: stretch;
}
.controls input,
.controls button {
width: 100%;
}
}
Data Collection Scripts
Fetch Data Script
Copy
# scripts/fetch_data.py
import os
import pandas as pd
from src.models.data_processor import StockDataProcessor
def fetch_stock_data(symbols, period="2y"):
"""Fetch data for multiple stock symbols"""
os.makedirs("data/raw", exist_ok=True)
for symbol in symbols:
print(f"Fetching data for {symbol}...")
try:
processor = StockDataProcessor(symbol, period)
data = processor.fetch_data()
# Save raw data
data.to_csv(f"data/raw/{symbol}_{period}.csv")
print(f"✅ Saved data for {symbol}")
except Exception as e:
print(f"❌ Error fetching {symbol}: {e}")
if __name__ == "__main__":
# Popular stocks to fetch
symbols = ["AAPL", "GOOGL", "MSFT", "AMZN", "TSLA", "NVDA", "META"]
fetch_stock_data(symbols)
Training Script
Copy
# scripts/train_model.py
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from src.models.data_processor import StockDataProcessor
from src.models.predictor import StockPredictor
def train_on_symbol(symbol, period="2y"):
"""Train model on a specific symbol"""
print(f"Training model on {symbol}...")
# Prepare data
processor = StockDataProcessor(symbol, period)
X, y, dates = processor.prepare_features()
if len(X) < 100:
print(f"❌ Insufficient data for {symbol}")
return None
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, shuffle=False
)
# Train model
predictor = StockPredictor()
results = predictor.train(X_train, y_train, X_test, y_test)
# Save model
os.makedirs("data/models", exist_ok=True)
predictor.save_model(f"data/models/{symbol}_model.pkl")
processor.save_scaler(f"data/models/{symbol}_scaler.pkl")
return results
if __name__ == "__main__":
symbol = "AAPL" # Default symbol
results = train_on_symbol(symbol)
if results:
print("\n🎉 Training completed!")
for name, metrics in results.items():
print(f"{name}: R² = {metrics['r2']:.4f}")
Jupyter Notebook
Copy
# notebooks/exploration.ipynb
"""
Stock Analysis Exploration
=========================
This notebook demonstrates exploratory data analysis
for the stock prediction project.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.models.data_processor import StockDataProcessor
# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
# Fetch data
processor = StockDataProcessor("AAPL", "2y")
data = processor.fetch_data()
# Basic statistics
print("Dataset Info:")
print(f"Shape: {data.shape}")
print(f"Date range: {data.index.min()} to {data.index.max()}")
print(f"Missing values: {data.isnull().sum().sum()}")
# Price trends
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# Price over time
axes[0, 0].plot(data.index, data['Close'])
axes[0, 0].set_title('Apple Stock Price Over Time')
axes[0, 0].set_ylabel('Price ($)')
# Volume over time
axes[0, 1].plot(data.index, data['Volume'], color='orange')
axes[0, 1].set_title('Trading Volume Over Time')
axes[0, 1].set_ylabel('Volume')
# Price distribution
axes[1, 0].hist(data['Close'], bins=50, alpha=0.7)
axes[1, 0].set_title('Price Distribution')
axes[1, 0].set_xlabel('Price ($)')
# Returns distribution
returns = data['Close'].pct_change().dropna()
axes[1, 1].hist(returns, bins=50, alpha=0.7, color='green')
axes[1, 1].set_title('Daily Returns Distribution')
axes[1, 1].set_xlabel('Return')
plt.tight_layout()
plt.show()
# Correlation analysis
features = ['Open', 'High', 'Low', 'Close', 'Volume']
correlation_matrix = data[features].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()
Running the Project
Development Mode
Copy
# Install dependencies
ppm install
# Start development servers
ppm run dev
# Or start individually
ppm run api # Backend API (port 8000)
ppm run web # Frontend (port 3000)
Data Pipeline
Copy
# Fetch fresh data
ppm run fetch-data
# Train the model
ppm run train-model
# Run complete pipeline
ppm run pipeline
Analysis
Copy
# Start Jupyter Lab
ppm run notebook
# Run analysis script
ppm run analyze
Deployment
Docker Setup
Copy
# Dockerfile
FROM python:3.11-slim
# Install Node.js
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash -
RUN apt-get install -y nodejs
# Install PPM
RUN curl -fsSL https://install.ppm.dev/install.sh | sh
WORKDIR /app
# Copy project files
COPY project.toml package.json ./
RUN ppm install
COPY . .
# Build frontend
RUN ppm run build
# Expose ports
EXPOSE 8000 3000
# Start application
CMD ["ppm", "run", "dev"]
Production Deployment
Copy
# Build production assets
ppm run build
# Start production server
ppm run start --env production