import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
from datetime import datetime

from alpha_vantage.timeseries import TimeSeries
import mplfinance as mpf

from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score
from skforecast.recursive import ForecasterRecursive
from skforecast.model_selection import backtesting_forecaster, TimeSeriesFold, grid_search_forecaster

sns.set_style("dark")    
sns.set_context("paper")

print("hora maquina UTC-1: ", datetime.now())

hora maquina UTC-1:  2026-04-02 23:25:05.670608

api_key = "G1IMZVJ1KGOAPL7Y"

ts = TimeSeries(key=api_key, output_format='pandas')
df, meta_data = ts.get_daily(symbol='MSFT', outputsize='compact' ) 
df = df.rename(columns={
    '1. open':'open', '2. high':'high', '3. low':'low',
    '4. close':'close', '5. volume':'volume'
    })
df = df.sort_index()
df.tail()

sns.lineplot(x=df.index, y=df['close'])
plt.title("Precio de cierre MSFT - Últimos 100 días")
plt.xticks(rotation=45);

df['returns_close'] = df['close'].pct_change(fill_method=None)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))  
sns.histplot(df['returns_close'].dropna(), kde=True, ax=axes[0])
axes[0].set_title("Distribución de retornos diarios")

sns.lineplot(x=df.index, y=df['returns_close'], ax=axes[1])
axes[1].set_title("Precio de cierre MSFT - Últimos 100 días")
axes[1].tick_params(axis='x', rotation=90)

plt.tight_layout();

df['Weekday'] = df.index.day_name()

sns.boxplot(x='Weekday', y='close', data=df,
            order=['Monday','Tuesday','Wednesday','Thursday','Friday'],
            hue="Weekday")
plt.title("Estacionalidad: Precio por día de la semana");

sns.set_style("dark") 
df['month'] = df.index.month

sns.boxplot(x='month', y='close', data=df, hue="month")
plt.title("Distribución de precios por mes");

fig, axes = plt.subplots(1, 2, figsize=(14, 5))  

plot_acf(df.close, lags=30, ax=axes[0], title="Gráfico de Autocorrelación de Close")

plot_pacf(df.close, lags=30, ax=axes[1], title="Gráfico de Autocorrelación Parcial de Close")

plt.tight_layout();

fig, axes = plt.subplots(1, 2, figsize=(14, 5)) 

plot_acf(df.returns_close.dropna(), lags=30, ax=axes[0], title="Gráfico de Autocorrelación de Returns Close")

plot_pacf(df.returns_close.dropna(), lags=30, ax=axes[1], title="Gráfico de Autocorrelación Parcial de Returns Close")

plt.tight_layout();

df['rolling_mean'] = df['close'].rolling(window=20).mean()
df['rolling_std'] = df['close'].rolling(window=20).std()

plt.figure(figsize=(10,5))
sns.lineplot(x=df.index, y=df['close'], label='Close')
sns.lineplot(x=df.index, y=df['rolling_mean'], label='Media móvil 20d')
sns.lineplot(x=df.index, y=df['rolling_std'], label='Volatilidad 20d')
plt.title("Rolling mean & std (20 días)")
plt.legend();

upper_band = df.rolling_mean + df.rolling_std
lower_band = df.rolling_mean - df.rolling_std

# Crear addplots
ap = [
    mpf.make_addplot(upper_band, color='blue'),
    mpf.make_addplot(lower_band, color='blue'),
]

mpf.plot( df, type='candle', volume=True, mav=(20), figscale=1.7, addplot=ap)

print('p-value:', adfuller(df['returns_close'].dropna())[1])

p-value: 1.2877454804368569e-15

seasonal_decompose(df['close'], model='mul', period=50).plot().set_size_inches(18,8);

serie = df['close']

serie.index = pd.to_datetime(serie.index)
serie = serie.asfreq('B')            
serie = serie.ffill()

horizon = 7
initial_train_size = len(serie) - 50

modelos = {
    "linreg": LinearRegression(),
    "ridge": Ridge(random_state=42),
    "lasso": Lasso(random_state=42),
    "enet": ElasticNet(random_state=42),
    "rf": RandomForestRegressor(random_state=42, n_jobs=-1),
    "gb": GradientBoostingRegressor(random_state=42)
}

resultados = {}

cv_esquema = TimeSeriesFold(
    initial_train_size = initial_train_size,
    steps = horizon,
    refit = False
)

for name, modelo in modelos.items():
    forecaster = ForecasterRecursive(estimator=modelo, lags=20)
    
    _, preds = backtesting_forecaster(
        forecaster = forecaster,
        y = serie,
        cv = cv_esquema,
        metric = 'mean_squared_error',
        verbose = False,
        show_progress = False
    )
    
    y_real = serie.loc[preds.index]
    
    resultados[name] = {
        "RMSE": root_mean_squared_error(y_real, preds['pred']),
        "MAE": mean_absolute_error(y_real, preds['pred']),
        "R2": r2_score(y_real, preds['pred'])
    }

df_resultados = pd.DataFrame(resultados).transpose().sort_values("RMSE")
print("\n--- Ranking de Modelos (Ordenados por RMSE) ---")
print(df_resultados)

--- Ranking de Modelos (Ordenados por RMSE) ---
              RMSE        MAE         R2
rf       56.287006  53.138154  -3.409714
gb       57.103714  53.913524  -3.538609
lasso    98.571568  78.411293 -12.523759
linreg  102.700492  80.736784 -13.680442
ridge   102.908721  80.917502 -13.740032
enet    103.968726  82.344896 -14.045254

param_grid = {
    'ridge': {
        'alpha': [0.01, 0.1, 1.0, 10.0, 50.0], 'fit_intercept': [True, False],
        'solver': ['auto', 'svd', 'cholesky', 'lsqr']
    },

    'lasso': {
        'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0], 'fit_intercept': [True, False],
        'selection': ['cyclic', 'random']
    },

    'enet': {
        'alpha': [0.0001, 0.001, 0.01, 0.1], 'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9],
        'fit_intercept': [True, False], 'selection': ['cyclic', 'random']
    },

    'rf': {
        'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 10, None],
        'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt', 'log2', None]
    },

    'gb': {
        'n_estimators': [50, 100, 200], 'learning_rate': [0.01, 0.05, 0.1],
        'max_depth': [2, 3, 4], 'subsample': [0.6, 0.8, 1.0], 'min_samples_split': [2, 5, 10]
    }
}

top_model_name = df_resultados.index[0]
print(f"\n--- Optimizando {top_model_name} ---")

forecaster_best = ForecasterRecursive(
    estimator = modelos[top_model_name], 
    lags = 20
    )

resultado_grid = grid_search_forecaster(
    forecaster = forecaster_best,
    y = serie,
    param_grid = param_grid[top_model_name],
    cv = cv_esquema,
    metric = 'mean_squared_error',
    return_best = True,
    verbose = False,
    show_progress = False, 
)
print(f"RMSE con Grid: ", np.sqrt(resultado_grid["mean_squared_error"][0]))

--- Optimizando rf ---

`Forecaster` refitted using the best-found lags and parameters, and the whole data set: 
  Lags: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20] 
  Parameters: {'max_depth': 5, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
  Backtesting metric: 3073.8841189928107
RMSE con Grid:  55.44262005887538

pio.renderers.default = "notebook_connected"

forecaster_best.fit(y=serie)
predicciones = forecaster_best.predict(steps=horizon)

historico = go.Scatter( x=serie.iloc[-30:].index, y=serie.iloc[-30:],
    mode='lines+markers', name="Histórico (Últimos 30 días)"
)

pred = go.Scatter( x=predicciones.index, y=predicciones,
    mode='lines+markers', name="Predicción a Futuro",
    line=dict(color='red'), marker=dict(size=8)
)

fig = go.Figure(data=[historico, pred])

fig.update_layout(
    title=f"MSFT: Predicción a 7 días con modelo {top_model_name.upper()}",
    xaxis_title="Fecha", yaxis_title="Precio de Cierre",
    template="plotly", legend=dict(x=1, y=1),
)

fig.show()

print(f"Las predicciones para los próximos 7 días son\n {predicciones.round(3)}")

Las predicciones para los próximos 7 días son
 2026-04-03    372.222
2026-04-06    372.113
2026-04-07    372.103
2026-04-08    372.369
2026-04-09    371.935
2026-04-10    371.251
2026-04-13    372.149
Freq: B, Name: pred, dtype: float64

	open	high	low	close	volume
date
2026-03-27	361.900	362.45	356.51	356.77	37883400.0
2026-03-30	361.895	365.36	356.28	358.96	44797002.0
2026-03-31	364.550	372.90	363.07	370.17	45244365.0
2026-04-01	373.490	373.99	368.20	369.37	29417206.0
2026-04-02	367.205	373.64	364.15	373.46	23912064.0

Análisis de MSFT¶

Cargar librerias¶

Obtener datos¶

Gráficos de Autocorrelación en Close y Returns Close¶

Preprocesamiento¶

Modelado¶