Skip to main content

Data Science Project

This example demonstrates building a complete data science project using PPM to manage both Python data processing/ML libraries and JavaScript visualization tools.

Project Overview

We’ll create a Stock Price Prediction System that:
  • Backend (Python): Fetches financial data, processes it, and trains ML models
  • Frontend (JavaScript): Creates interactive dashboards and visualizations
  • Data Pipeline: Automated data collection and model training
  • API: RESTful service for predictions and data access

Project Setup

Initialize the Project

1

Create Project Directory

mkdir stock-predictor
cd stock-predictor
2

Initialize PPM Project

ppm init stock-predictor --template data-science
3

Add Dependencies

# Python data science stack
ppm add pandas numpy scikit-learn matplotlib seaborn
ppm add yfinance requests python-dotenv fastapi uvicorn

# JavaScript visualization tools  
ppm add --js d3 chart.js plotly.js express cors
ppm add --js --dev vite @vitejs/plugin-vanilla

Project Configuration

# project.toml
[project]
name = "stock-predictor"
version = "1.0.0"
description = "ML-powered stock price prediction with interactive visualization"

[dependencies.python]
pandas = "^2.1.0"
numpy = "^1.24.0"
scikit-learn = "^1.3.0"
matplotlib = "^3.7.0"
seaborn = "^0.12.0"
yfinance = "^0.2.0"
requests = "^2.31.0"
python-dotenv = "^1.0.0"
fastapi = "^0.104.0"
uvicorn = "^0.24.0"

[dependencies.js]
d3 = "^7.8.0"
"chart.js" = "^4.4.0"
"plotly.js" = "^2.26.0"
express = "^4.18.0"
cors = "^2.8.0"

[dev-dependencies.python]
pytest = "^7.4.0"
jupyter = "^1.0.0"
ipykernel = "^6.25.0"

[dev-dependencies.js]
vite = "^5.0.0"
"@vitejs/plugin-vanilla" = "^1.0.0"

[scripts]
# Data pipeline
fetch-data = "python scripts/fetch_data.py"
train-model = "python scripts/train_model.py"
pipeline = ["ppm run fetch-data", "ppm run train-model"]

# Servers
api = "uvicorn src.api:app --reload --host 0.0.0.0 --port 8000"
web = "vite --host 0.0.0.0 --port 3000"
dev = ["ppm run api &", "ppm run web"]

# Analysis
notebook = "jupyter lab"
analyze = "python scripts/analyze.py"

Project Structure

Create the following directory structure:
stock-predictor/
├── project.toml
├── data/
│   ├── raw/
│   ├── processed/
│   └── models/
├── src/
│   ├── api.py          # FastAPI backend
│   ├── models/
│   │   ├── predictor.py
│   │   └── data_processor.py
│   └── utils/
│       └── helpers.py
├── scripts/
│   ├── fetch_data.py   # Data collection
│   ├── train_model.py  # Model training
│   └── analyze.py      # Analysis scripts
├── web/
│   ├── index.html
│   ├── src/
│   │   ├── main.js
│   │   ├── charts.js
│   │   └── api.js
│   └── style.css
├── notebooks/
│   └── exploration.ipynb
└── tests/
    ├── test_models.py
    └── test_api.py

Backend Implementation

Data Processor

# src/models/data_processor.py
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pickle
import os

class StockDataProcessor:
    def __init__(self, symbol, period="2y"):
        self.symbol = symbol
        self.period = period
        self.scaler = MinMaxScaler()
        self.data = None
        
    def fetch_data(self):
        """Fetch stock data from Yahoo Finance"""
        ticker = yf.Ticker(self.symbol)
        self.data = ticker.history(period=self.period)
        return self.data
    
    def prepare_features(self, lookback=60):
        """Prepare features for ML model"""
        if self.data is None:
            self.fetch_data()
            
        # Calculate technical indicators
        df = self.data.copy()
        
        # Moving averages
        df['MA_7'] = df['Close'].rolling(window=7).mean()
        df['MA_21'] = df['Close'].rolling(window=21).mean()
        df['MA_50'] = df['Close'].rolling(window=50).mean()
        
        # Relative Strength Index (RSI)
        delta = df['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        rs = gain / loss
        df['RSI'] = 100 - (100 / (1 + rs))
        
        # Bollinger Bands
        rolling_mean = df['Close'].rolling(window=20).mean()
        rolling_std = df['Close'].rolling(window=20).std()
        df['BB_upper'] = rolling_mean + (rolling_std * 2)
        df['BB_lower'] = rolling_mean - (rolling_std * 2)
        
        # Volume indicators
        df['Volume_MA'] = df['Volume'].rolling(window=20).mean()
        df['Volume_ratio'] = df['Volume'] / df['Volume_MA']
        
        # Price features
        df['Price_change'] = df['Close'].pct_change()
        df['High_Low_ratio'] = df['High'] / df['Low']
        
        # Remove NaN values
        df = df.dropna()
        
        # Prepare sequences for LSTM
        feature_columns = [
            'Open', 'High', 'Low', 'Close', 'Volume',
            'MA_7', 'MA_21', 'MA_50', 'RSI', 
            'BB_upper', 'BB_lower', 'Volume_ratio',
            'Price_change', 'High_Low_ratio'
        ]
        
        features = df[feature_columns].values
        targets = df['Close'].values
        
        # Scale features
        features_scaled = self.scaler.fit_transform(features)
        
        # Create sequences
        X, y = [], []
        for i in range(lookback, len(features_scaled)):
            X.append(features_scaled[i-lookback:i])
            y.append(targets[i])
            
        return np.array(X), np.array(y), df.index[lookback:]
    
    def save_scaler(self, path):
        """Save the fitted scaler"""
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wb') as f:
            pickle.dump(self.scaler, f)
    
    def load_scaler(self, path):
        """Load a fitted scaler"""
        with open(path, 'rb') as f:
            self.scaler = pickle.load(f)

ML Model

# src/models/predictor.py
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import os

class StockPredictor:
    def __init__(self):
        self.models = {
            'random_forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'gradient_boost': GradientBoostingRegressor(n_estimators=100, random_state=42),
            'linear': LinearRegression()
        }
        self.trained_models = {}
        self.best_model = None
        
    def train(self, X_train, y_train, X_test, y_test):
        """Train multiple models and select the best one"""
        results = {}
        
        for name, model in self.models.items():
            print(f"Training {name}...")
            
            # Reshape data for non-LSTM models
            X_train_reshaped = X_train.reshape(X_train.shape[0], -1)
            X_test_reshaped = X_test.reshape(X_test.shape[0], -1)
            
            # Train model
            model.fit(X_train_reshaped, y_train)
            
            # Make predictions
            y_pred = model.predict(X_test_reshaped)
            
            # Calculate metrics
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            
            results[name] = {
                'model': model,
                'mse': mse,
                'mae': mae,
                'r2': r2,
                'predictions': y_pred
            }
            
            print(f"{name} - MSE: {mse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")
        
        # Select best model based on R² score
        best_name = max(results.keys(), key=lambda k: results[k]['r2'])
        self.best_model = results[best_name]['model']
        self.trained_models = results
        
        print(f"\nBest model: {best_name}")
        return results
    
    def predict(self, X):
        """Make predictions using the best model"""
        if self.best_model is None:
            raise ValueError("No trained model available")
        
        X_reshaped = X.reshape(X.shape[0], -1)
        return self.best_model.predict(X_reshaped)
    
    def predict_next(self, X_last, steps=5):
        """Predict next N steps"""
        predictions = []
        current_sequence = X_last.copy()
        
        for _ in range(steps):
            pred = self.predict(current_sequence.reshape(1, *current_sequence.shape))[0]
            predictions.append(pred)
            
            # Update sequence (simplified - in practice, you'd update all features)
            current_sequence = np.roll(current_sequence, -1, axis=0)
            current_sequence[-1, -1] = pred  # Update close price
            
        return np.array(predictions)
    
    def save_model(self, path):
        """Save the best model"""
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wb') as f:
            pickle.dump(self.best_model, f)
    
    def load_model(self, path):
        """Load a trained model"""
        with open(path, 'rb') as f:
            self.best_model = pickle.load(f)

FastAPI Backend

# src/api.py
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os

from models.data_processor import StockDataProcessor
from models.predictor import StockPredictor

app = FastAPI(title="Stock Predictor API", version="1.0.0")

# Enable CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Global variables
predictor = StockPredictor()
processor = StockDataProcessor("AAPL")

# Load models if they exist
if os.path.exists("data/models/best_model.pkl"):
    predictor.load_model("data/models/best_model.pkl")
if os.path.exists("data/models/scaler.pkl"):
    processor.load_scaler("data/models/scaler.pkl")

class PredictionRequest(BaseModel):
    symbol: str
    days: int = 5

class TrainRequest(BaseModel):
    symbol: str
    period: str = "2y"

@app.get("/")
async def root():
    return {"message": "Stock Predictor API", "version": "1.0.0"}

@app.get("/stocks/{symbol}/data")
async def get_stock_data(symbol: str, period: str = "1y"):
    """Get historical stock data"""
    try:
        processor = StockDataProcessor(symbol, period)
        data = processor.fetch_data()
        
        # Convert to JSON-serializable format
        result = []
        for date, row in data.iterrows():
            result.append({
                "date": date.strftime("%Y-%m-%d"),
                "open": float(row["Open"]),
                "high": float(row["High"]),
                "low": float(row["Low"]),
                "close": float(row["Close"]),
                "volume": int(row["Volume"])
            })
        
        return {"symbol": symbol, "data": result}
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.post("/predict")
async def predict_stock(request: PredictionRequest):
    """Predict future stock prices"""
    try:
        if predictor.best_model is None:
            raise HTTPException(status_code=400, detail="No trained model available")
        
        # Get recent data for prediction
        processor = StockDataProcessor(request.symbol)
        X, y, dates = processor.prepare_features()
        
        if len(X) == 0:
            raise HTTPException(status_code=400, detail="Insufficient data for prediction")
        
        # Use last sequence for prediction
        last_sequence = X[-1]
        predictions = predictor.predict_next(last_sequence, request.days)
        
        # Generate future dates
        last_date = dates[-1]
        future_dates = []
        for i in range(1, request.days + 1):
            future_date = last_date + timedelta(days=i)
            # Skip weekends (simplified)
            while future_date.weekday() >= 5:
                future_date += timedelta(days=1)
            future_dates.append(future_date.strftime("%Y-%m-%d"))
        
        result = []
        for date, price in zip(future_dates, predictions):
            result.append({
                "date": date,
                "predicted_price": float(price)
            })
        
        return {
            "symbol": request.symbol,
            "predictions": result,
            "last_actual_price": float(y[-1]),
            "prediction_confidence": "medium"  # Simplified
        }
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.post("/train")
async def train_model(request: TrainRequest):
    """Train the prediction model"""
    try:
        # Prepare data
        processor = StockDataProcessor(request.symbol, request.period)
        X, y, dates = processor.prepare_features()
        
        if len(X) < 100:
            raise HTTPException(status_code=400, detail="Insufficient data for training")
        
        # Split data
        split_idx = int(len(X) * 0.8)
        X_train, X_test = X[:split_idx], X[split_idx:]
        y_train, y_test = y[:split_idx], y[split_idx:]
        
        # Train models
        results = predictor.train(X_train, y_train, X_test, y_test)
        
        # Save model and scaler
        predictor.save_model("data/models/best_model.pkl")
        processor.save_scaler("data/models/scaler.pkl")
        
        # Return training results
        return {
            "symbol": request.symbol,
            "training_samples": len(X_train),
            "test_samples": len(X_test),
            "model_performance": {
                name: {
                    "mse": float(metrics["mse"]),
                    "mae": float(metrics["mae"]),
                    "r2": float(metrics["r2"])
                }
                for name, metrics in results.items()
            }
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy",
        "model_loaded": predictor.best_model is not None,
        "timestamp": datetime.now().isoformat()
    }

Frontend Implementation

HTML Structure

<!-- web/index.html -->
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Stock Predictor Dashboard</title>
    <link rel="stylesheet" href="style.css">
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
    <div class="container">
        <header>
            <h1>📈 Stock Predictor Dashboard</h1>
            <div class="controls">
                <input type="text" id="symbolInput" placeholder="Enter stock symbol (e.g., AAPL)" value="AAPL">
                <button id="loadDataBtn">Load Data</button>
                <button id="predictBtn">Predict</button>
                <button id="trainBtn">Train Model</button>
            </div>
        </header>

        <main>
            <div class="grid">
                <div class="card">
                    <h2>Stock Price History</h2>
                    <div id="priceChart"></div>
                </div>

                <div class="card">
                    <h2>Predictions</h2>
                    <div id="predictionChart"></div>
                </div>

                <div class="card">
                    <h2>Technical Indicators</h2>
                    <div id="indicatorsChart"></div>
                </div>

                <div class="card">
                    <h2>Model Performance</h2>
                    <div id="performanceChart"></div>
                </div>
            </div>

            <div class="stats">
                <div class="stat-card">
                    <h3>Current Price</h3>
                    <div id="currentPrice">-</div>
                </div>
                <div class="stat-card">
                    <h3>Predicted Change</h3>
                    <div id="predictedChange">-</div>
                </div>
                <div class="stat-card">
                    <h3>Confidence</h3>
                    <div id="confidence">-</div>
                </div>
                <div class="stat-card">
                    <h3>Last Updated</h3>
                    <div id="lastUpdated">-</div>
                </div>
            </div>
        </main>
    </div>

    <script type="module" src="src/main.js"></script>
</body>
</html>

JavaScript Implementation

// web/src/api.js
const API_BASE = 'http://localhost:8000';

export class StockAPI {
    async getStockData(symbol, period = '1y') {
        const response = await fetch(`${API_BASE}/stocks/${symbol}/data?period=${period}`);
        if (!response.ok) throw new Error(`Failed to fetch data: ${response.statusText}`);
        return response.json();
    }

    async predict(symbol, days = 5) {
        const response = await fetch(`${API_BASE}/predict`, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ symbol, days })
        });
        if (!response.ok) throw new Error(`Prediction failed: ${response.statusText}`);
        return response.json();
    }

    async trainModel(symbol, period = '2y') {
        const response = await fetch(`${API_BASE}/train`, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ symbol, period })
        });
        if (!response.ok) throw new Error(`Training failed: ${response.statusText}`);
        return response.json();
    }

    async healthCheck() {
        const response = await fetch(`${API_BASE}/health`);
        return response.json();
    }
}
// web/src/charts.js
export class ChartManager {
    constructor() {
        this.charts = {};
    }

    createPriceChart(data, predictions = null) {
        const trace1 = {
            x: data.map(d => d.date),
            y: data.map(d => d.close),
            type: 'scatter',
            mode: 'lines',
            name: 'Actual Price',
            line: { color: '#2196F3' }
        };

        const traces = [trace1];

        if (predictions) {
            const trace2 = {
                x: predictions.map(p => p.date),
                y: predictions.map(p => p.predicted_price),
                type: 'scatter',
                mode: 'lines+markers',
                name: 'Predictions',
                line: { color: '#FF5722', dash: 'dash' },
                marker: { size: 8 }
            };
            traces.push(trace2);
        }

        const layout = {
            title: 'Stock Price History & Predictions',
            xaxis: { title: 'Date' },
            yaxis: { title: 'Price ($)' },
            showlegend: true
        };

        Plotly.newPlot('priceChart', traces, layout);
    }

    createCandlestickChart(data) {
        const trace = {
            x: data.map(d => d.date),
            close: data.map(d => d.close),
            decreasing: { line: { color: '#FF5722' }},
            high: data.map(d => d.high),
            increasing: { line: { color: '#4CAF50' }},
            low: data.map(d => d.low),
            open: data.map(d => d.open),
            type: 'candlestick',
            xaxis: 'x',
            yaxis: 'y'
        };

        const layout = {
            title: 'Candlestick Chart',
            dragmode: 'zoom',
            showlegend: false,
            xaxis: {
                autorange: true,
                title: 'Date',
                type: 'date'
            },
            yaxis: {
                autorange: true,
                title: 'Price ($)',
                type: 'linear'
            }
        };

        Plotly.newPlot('indicatorsChart', [trace], layout);
    }

    createVolumeChart(data) {
        const ctx = document.getElementById('performanceChart');
        
        if (this.charts.volume) {
            this.charts.volume.destroy();
        }

        this.charts.volume = new Chart(ctx, {
            type: 'bar',
            data: {
                labels: data.slice(-30).map(d => d.date),
                datasets: [{
                    label: 'Volume',
                    data: data.slice(-30).map(d => d.volume),
                    backgroundColor: 'rgba(156, 39, 176, 0.3)',
                    borderColor: 'rgba(156, 39, 176, 1)',
                    borderWidth: 1
                }]
            },
            options: {
                responsive: true,
                scales: {
                    y: {
                        beginAtZero: true,
                        title: {
                            display: true,
                            text: 'Volume'
                        }
                    }
                }
            }
        });
    }

    updatePredictionChart(predictions, currentPrice) {
        const dates = predictions.map(p => p.date);
        const prices = predictions.map(p => p.predicted_price);
        
        // Add current price as starting point
        dates.unshift('Today');
        prices.unshift(currentPrice);

        const trace = {
            x: dates,
            y: prices,
            type: 'scatter',
            mode: 'lines+markers',
            name: 'Predictions',
            line: { color: '#FF9800', width: 3 },
            marker: { size: 8, color: '#FF9800' }
        };

        const layout = {
            title: 'Price Predictions',
            xaxis: { title: 'Date' },
            yaxis: { title: 'Predicted Price ($)' },
            showlegend: false
        };

        Plotly.newPlot('predictionChart', [trace], layout);
    }
}
// web/src/main.js
import { StockAPI } from './api.js';
import { ChartManager } from './charts.js';

class StockDashboard {
    constructor() {
        this.api = new StockAPI();
        this.charts = new ChartManager();
        this.currentData = null;
        this.currentSymbol = 'AAPL';
        
        this.initializeEventListeners();
        this.loadInitialData();
    }

    initializeEventListeners() {
        document.getElementById('loadDataBtn').addEventListener('click', () => {
            this.loadStockData();
        });

        document.getElementById('predictBtn').addEventListener('click', () => {
            this.makePrediction();
        });

        document.getElementById('trainBtn').addEventListener('click', () => {
            this.trainModel();
        });

        document.getElementById('symbolInput').addEventListener('keypress', (e) => {
            if (e.key === 'Enter') {
                this.loadStockData();
            }
        });
    }

    async loadInitialData() {
        try {
            await this.loadStockData();
            await this.checkModelStatus();
        } catch (error) {
            console.error('Failed to load initial data:', error);
            this.showError('Failed to load initial data');
        }
    }

    async loadStockData() {
        const symbol = document.getElementById('symbolInput').value.toUpperCase();
        if (!symbol) return;

        this.currentSymbol = symbol;
        this.showLoading('Loading stock data...');

        try {
            const result = await this.api.getStockData(symbol, '1y');
            this.currentData = result.data;

            // Update charts
            this.charts.createPriceChart(this.currentData);
            this.charts.createCandlestickChart(this.currentData);
            this.charts.createVolumeChart(this.currentData);

            // Update stats
            const latestData = this.currentData[this.currentData.length - 1];
            document.getElementById('currentPrice').textContent = `$${latestData.close.toFixed(2)}`;
            document.getElementById('lastUpdated').textContent = new Date().toLocaleString();

            this.hideLoading();
        } catch (error) {
            console.error('Error loading stock data:', error);
            this.showError('Failed to load stock data');
        }
    }

    async makePrediction() {
        if (!this.currentSymbol) return;

        this.showLoading('Making predictions...');

        try {
            const result = await this.api.predict(this.currentSymbol, 5);
            
            // Update prediction chart
            this.charts.updatePredictionChart(result.predictions, result.last_actual_price);

            // Update stats
            const firstPrediction = result.predictions[0];
            const lastPrediction = result.predictions[result.predictions.length - 1];
            const change = ((lastPrediction.predicted_price - result.last_actual_price) / result.last_actual_price) * 100;
            
            document.getElementById('predictedChange').textContent = `${change > 0 ? '+' : ''}${change.toFixed(2)}%`;
            document.getElementById('predictedChange').className = change > 0 ? 'positive' : 'negative';
            document.getElementById('confidence').textContent = result.prediction_confidence;

            this.hideLoading();
        } catch (error) {
            console.error('Error making prediction:', error);
            this.showError('Failed to make prediction. Train the model first.');
        }
    }

    async trainModel() {
        if (!this.currentSymbol) return;

        this.showLoading('Training model... This may take a while.');

        try {
            const result = await this.api.trainModel(this.currentSymbol, '2y');
            
            // Show training results
            console.log('Training completed:', result);
            alert(`Model trained successfully!\nTraining samples: ${result.training_samples}\nBest model R²: ${Object.values(result.model_performance)[0].r2.toFixed(4)}`);

            this.hideLoading();
        } catch (error) {
            console.error('Error training model:', error);
            this.showError('Failed to train model');
        }
    }

    async checkModelStatus() {
        try {
            const health = await this.api.healthCheck();
            const statusEl = document.createElement('div');
            statusEl.className = 'status';
            statusEl.innerHTML = `Model Status: ${health.model_loaded ? '✅ Loaded' : '❌ Not trained'}`;
            document.querySelector('.controls').appendChild(statusEl);
        } catch (error) {
            console.error('Error checking model status:', error);
        }
    }

    showLoading(message) {
        const loadingEl = document.getElementById('loading') || document.createElement('div');
        loadingEl.id = 'loading';
        loadingEl.className = 'loading';
        loadingEl.textContent = message;
        document.body.appendChild(loadingEl);
    }

    hideLoading() {
        const loadingEl = document.getElementById('loading');
        if (loadingEl) loadingEl.remove();
    }

    showError(message) {
        this.hideLoading();
        alert(`Error: ${message}`);
    }
}

// Initialize dashboard when page loads
document.addEventListener('DOMContentLoaded', () => {
    new StockDashboard();
});

CSS Styling

/* web/style.css */
* {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
}

body {
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    min-height: 100vh;
    color: #333;
}

.container {
    max-width: 1400px;
    margin: 0 auto;
    padding: 20px;
}

header {
    background: white;
    border-radius: 12px;
    padding: 24px;
    margin-bottom: 24px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}

header h1 {
    font-size: 2.5rem;
    margin-bottom: 16px;
    background: linear-gradient(45deg, #667eea, #764ba2);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
}

.controls {
    display: flex;
    gap: 12px;
    align-items: center;
    flex-wrap: wrap;
}

.controls input {
    padding: 12px 16px;
    border: 2px solid #e1e5e9;
    border-radius: 8px;
    font-size: 14px;
    min-width: 200px;
}

.controls button {
    padding: 12px 24px;
    background: linear-gradient(45deg, #667eea, #764ba2);
    color: white;
    border: none;
    border-radius: 8px;
    font-weight: 600;
    cursor: pointer;
    transition: transform 0.2s;
}

.controls button:hover {
    transform: translateY(-2px);
}

.status {
    margin-left: auto;
    padding: 8px 16px;
    background: #f8f9fa;
    border-radius: 20px;
    font-size: 14px;
}

.grid {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(600px, 1fr));
    gap: 24px;
    margin-bottom: 24px;
}

.card {
    background: white;
    border-radius: 12px;
    padding: 24px;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}

.card h2 {
    margin-bottom: 16px;
    color: #2c3e50;
    font-size: 1.25rem;
}

.stats {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    gap: 16px;
}

.stat-card {
    background: white;
    border-radius: 8px;
    padding: 20px;
    text-align: center;
    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}

.stat-card h3 {
    font-size: 14px;
    color: #666;
    margin-bottom: 8px;
    text-transform: uppercase;
    letter-spacing: 0.5px;
}

.stat-card div {
    font-size: 24px;
    font-weight: bold;
    color: #2c3e50;
}

.positive {
    color: #4CAF50 !important;
}

.negative {
    color: #F44336 !important;
}

.loading {
    position: fixed;
    top: 50%;
    left: 50%;
    transform: translate(-50%, -50%);
    background: rgba(0, 0, 0, 0.8);
    color: white;
    padding: 20px 40px;
    border-radius: 8px;
    font-size: 16px;
    z-index: 1000;
}

@media (max-width: 768px) {
    .grid {
        grid-template-columns: 1fr;
    }
    
    .controls {
        flex-direction: column;
        align-items: stretch;
    }
    
    .controls input,
    .controls button {
        width: 100%;
    }
}

Data Collection Scripts

Fetch Data Script

# scripts/fetch_data.py
import os
import pandas as pd
from src.models.data_processor import StockDataProcessor

def fetch_stock_data(symbols, period="2y"):
    """Fetch data for multiple stock symbols"""
    os.makedirs("data/raw", exist_ok=True)
    
    for symbol in symbols:
        print(f"Fetching data for {symbol}...")
        try:
            processor = StockDataProcessor(symbol, period)
            data = processor.fetch_data()
            
            # Save raw data
            data.to_csv(f"data/raw/{symbol}_{period}.csv")
            print(f"✅ Saved data for {symbol}")
            
        except Exception as e:
            print(f"❌ Error fetching {symbol}: {e}")

if __name__ == "__main__":
    # Popular stocks to fetch
    symbols = ["AAPL", "GOOGL", "MSFT", "AMZN", "TSLA", "NVDA", "META"]
    fetch_stock_data(symbols)

Training Script

# scripts/train_model.py
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from src.models.data_processor import StockDataProcessor
from src.models.predictor import StockPredictor

def train_on_symbol(symbol, period="2y"):
    """Train model on a specific symbol"""
    print(f"Training model on {symbol}...")
    
    # Prepare data
    processor = StockDataProcessor(symbol, period)
    X, y, dates = processor.prepare_features()
    
    if len(X) < 100:
        print(f"❌ Insufficient data for {symbol}")
        return None
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False
    )
    
    # Train model
    predictor = StockPredictor()
    results = predictor.train(X_train, y_train, X_test, y_test)
    
    # Save model
    os.makedirs("data/models", exist_ok=True)
    predictor.save_model(f"data/models/{symbol}_model.pkl")
    processor.save_scaler(f"data/models/{symbol}_scaler.pkl")
    
    return results

if __name__ == "__main__":
    symbol = "AAPL"  # Default symbol
    results = train_on_symbol(symbol)
    
    if results:
        print("\n🎉 Training completed!")
        for name, metrics in results.items():
            print(f"{name}: R² = {metrics['r2']:.4f}")

Jupyter Notebook

# notebooks/exploration.ipynb
"""
Stock Analysis Exploration
=========================

This notebook demonstrates exploratory data analysis
for the stock prediction project.
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from src.models.data_processor import StockDataProcessor

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Fetch data
processor = StockDataProcessor("AAPL", "2y")
data = processor.fetch_data()

# Basic statistics
print("Dataset Info:")
print(f"Shape: {data.shape}")
print(f"Date range: {data.index.min()} to {data.index.max()}")
print(f"Missing values: {data.isnull().sum().sum()}")

# Price trends
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Price over time
axes[0, 0].plot(data.index, data['Close'])
axes[0, 0].set_title('Apple Stock Price Over Time')
axes[0, 0].set_ylabel('Price ($)')

# Volume over time
axes[0, 1].plot(data.index, data['Volume'], color='orange')
axes[0, 1].set_title('Trading Volume Over Time')
axes[0, 1].set_ylabel('Volume')

# Price distribution
axes[1, 0].hist(data['Close'], bins=50, alpha=0.7)
axes[1, 0].set_title('Price Distribution')
axes[1, 0].set_xlabel('Price ($)')

# Returns distribution
returns = data['Close'].pct_change().dropna()
axes[1, 1].hist(returns, bins=50, alpha=0.7, color='green')
axes[1, 1].set_title('Daily Returns Distribution')
axes[1, 1].set_xlabel('Return')

plt.tight_layout()
plt.show()

# Correlation analysis
features = ['Open', 'High', 'Low', 'Close', 'Volume']
correlation_matrix = data[features].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()

Running the Project

Development Mode

# Install dependencies
ppm install

# Start development servers
ppm run dev

# Or start individually
ppm run api    # Backend API (port 8000)
ppm run web    # Frontend (port 3000)

Data Pipeline

# Fetch fresh data
ppm run fetch-data

# Train the model
ppm run train-model

# Run complete pipeline
ppm run pipeline

Analysis

# Start Jupyter Lab
ppm run notebook

# Run analysis script
ppm run analyze

Deployment

Docker Setup

# Dockerfile
FROM python:3.11-slim

# Install Node.js
RUN curl -fsSL https://deb.nodesource.com/setup_18.x | bash -
RUN apt-get install -y nodejs

# Install PPM
RUN curl -fsSL https://install.ppm.dev/install.sh | sh

WORKDIR /app

# Copy project files
COPY project.toml package.json ./
RUN ppm install

COPY . .

# Build frontend
RUN ppm run build

# Expose ports
EXPOSE 8000 3000

# Start application
CMD ["ppm", "run", "dev"]

Production Deployment

# Build production assets
ppm run build

# Start production server
ppm run start --env production
This comprehensive data science project demonstrates how PPM seamlessly manages both Python data science libraries and JavaScript visualization tools in a single, unified workflow. The ML backend processes financial data and makes predictions, while the interactive frontend provides real-time visualization and user interaction.