Module 3.2: Causal Prediction & Forecasting
What You'll Learn
- Why causal features make better predictions
- Using the Models class for prediction
- Causal feature selection
- Comparing causal vs. non-causal predictions
Why Causal Prediction?
Standard ML uses ALL correlated features. But:
- Spurious correlations break under distribution shift
- Using effects as predictors can hurt generalization
- Causal features are more robust
Key insight: Using only CAUSAL predictors often beats using ALL predictors!
Setup: Create Prediction Scenario
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from tigramite import data_processing as pp
from tigramite.pcmci import PCMCI
from tigramite.independence_tests.parcorr import ParCorr
from tigramite.models import Models
from tigramite.toymodels import structural_causal_processes as toys
# Create a system where we want to predict Y
np.random.seed(42)
def lin_f(x): return x
# X0, X1 are TRUE causes of X2 (our target)
# X3 is correlated with X2 but NOT a cause (effect of X2!)
links = {
0: [((0, -1), 0.7, lin_f)],
1: [((1, -1), 0.6, lin_f)],
2: [((2, -1), 0.3, lin_f), ((0, -1), 0.5, lin_f), ((1, -2), 0.4, lin_f)], # Target
3: [((3, -1), 0.5, lin_f), ((2, -1), 0.6, lin_f)], # X3 is EFFECT of X2!
}
T = 1000
data, _ = toys.structural_causal_process(links, T=T, seed=42)
var_names = ['Feature_A', 'Feature_B', 'Target', 'Spurious']
# Prediction scenario:
# - Feature_A (X0) CAUSES Target (X2) at lag 1
# - Feature_B (X1) CAUSES Target (X2) at lag 2
# - Spurious (X3) is CORRELATED but NOT a cause!
# - (Spurious is actually CAUSED BY Target)
The Spurious Correlation Trap
# Check correlations - Spurious looks predictive!
print("Correlations with Target:")
for i, name in enumerate(var_names):
if i != 2: # Skip target itself
corr = np.corrcoef(data[:-1, i], data[1:, 2])[0, 1]
print(f" {name}: {corr:.3f}")
# Spurious has HIGH correlation but is NOT a valid predictor!
Warning: Spurious shows high correlation with Target, but using it as a predictor would be wrong - it's an EFFECT, not a cause!
Step 1: Discover Causal Parents
# First, discover the causal structure
dataframe = pp.DataFrame(data, var_names=var_names)
parcorr = ParCorr(significance='analytic')
pcmci = PCMCI(dataframe=dataframe, cond_ind_test=parcorr, verbosity=0)
results = pcmci.run_pcmciplus(tau_max=5, pc_alpha=0.05)
# Find causal parents of Target (index 2)
target_idx = 2
print("Discovered causal parents of Target:")
for i in range(4):
for tau in range(6):
if results['graph'][target_idx, i, tau] == '-->':
val = results['val_matrix'][target_idx, i, tau]
print(f" {var_names[i]}(t-{tau}) → Target: strength={val:.3f}")
Step 2: Compare Prediction Approaches
# Split data into train/test
train_end = 800
# Approach 1: Use ALL features (standard ML)
def create_features_all(data, target_idx=2, max_lag=5):
"""Create features from ALL variables"""
features = []
for lag in range(1, max_lag + 1):
for var in range(data.shape[1]):
features.append(data[max_lag-lag:-lag if lag < max_lag else None, var])
X = np.column_stack(features)[:-(max_lag)]
y = data[max_lag:, target_idx]
return X[:len(y)], y
# Approach 2: Use only CAUSAL features
def create_features_causal(data, target_idx=2):
"""Create features from only causal parents"""
# Based on our discovered graph: X0(t-1) and X1(t-2)
X = np.column_stack([
data[1:-1, 0], # Feature_A at lag 1
data[:-2, 1], # Feature_B at lag 2
])
y = data[2:, target_idx]
return X[:len(y)], y
# Train and evaluate
X_all, y_all = create_features_all(data)
X_causal, y_causal = create_features_causal(data)
# All features model
model_all = LinearRegression()
model_all.fit(X_all[:train_end-5], y_all[:train_end-5])
pred_all = model_all.predict(X_all[train_end-5:])
mse_all = mean_squared_error(y_all[train_end-5:], pred_all)
# Causal features model
model_causal = LinearRegression()
model_causal.fit(X_causal[:train_end-2], y_causal[:train_end-2])
pred_causal = model_causal.predict(X_causal[train_end-2:])
mse_causal = mean_squared_error(y_causal[train_end-2:], pred_causal)
print("Prediction Results:")
print(f" All features (20 total): MSE = {mse_all:.4f}")
print(f" Causal features only (2): MSE = {mse_causal:.4f}")
print(f"\nCausal features use 10% of variables but similar/better performance!")
Using Tigramite's Models Class
# Extract causal parents from the graph
def get_causal_parents(results, target_idx):
"""Extract causal parents from PCMCI results"""
parents = {target_idx: []}
for i in range(results['graph'].shape[0]):
for tau in range(results['graph'].shape[2]):
if results['graph'][target_idx, i, tau] == '-->':
parents[target_idx].append((i, -tau))
return parents
parents = get_causal_parents(results, target_idx=2)
print(f"Causal parents of Target: {parents}")
# Use Models class for prediction
from tigramite.models import Models
model = Models(
dataframe=dataframe,
model=LinearRegression(),
data_transform=None,
verbosity=0
)
# Fit with causal parents
model.fit_full_model(
all_parents=parents,
selected_targets=[2], # Predict Target
tau_max=5
)
# Get predictions
predictions = model.get_fit(target=2, data=dataframe.values[0])
print(f"Model fitted with {len(parents[2])} causal parent(s)")
Key Takeaways
- Causal features = causes of the target (not just correlations)
- Fewer features, similar/better performance - causal selection is efficient
- More robust - causal predictors work under distribution shift
- Avoid reverse causation - don't use effects of the target as predictors!
- Use PCMCI first to identify which variables are valid predictors