#' Simulate Multi-State Joint Modeling Data
#'
#' @description
#' Generates realistic simulated data for a multi-state chronic disease
#' cohort (CKD/CVD/Diabetes) with three longitudinal biomarkers (eGFR,
#' BNP, HbA1c) and bidirectional transitions. Includes demographic
#' covariates and realistic biomarker trajectories.
#'
#' @param n_patients Integer number of patients. Default \code{500}.
#' @param max_followup Numeric maximum follow-up time in years. Default \code{15}.
#' @param max_events Integer maximum number of events per patient. Default \code{3}.
#' @param seed Integer random seed. Default \code{42}.
#'
#' @return A list with two data frames:
#'   \item{long_data}{Longitudinal biomarker measurements with columns
#'     \code{patient_id}, \code{visit_time_years}, \code{biomarker},
#'     \code{value}, \code{unit}}
#'   \item{surv_data}{Survival/transition data with columns
#'     \code{patient_id}, \code{start_time}, \code{stop_time},
#'     \code{status}, \code{event_type}, \code{state_from}, \code{state_to},
#'     \code{transition}, \code{entry_disease}, \code{age_baseline},
#'     \code{sex}, \code{ethnicity}, \code{smoking}, \code{bmi}}
#'
#' @examples
#' sim <- simulate_jmSurface(n_patients = 200, seed = 123)
#' head(sim$long_data)
#' table(sim$surv_data$transition)
#'
#' @export
simulate_jmSurface <- function(n_patients = 500, max_followup = 15,
                                max_events = 3, seed = 42) {

  set.seed(seed)
  diseases <- c("CKD", "CVD", "Diabetes")

  ethnicity_pool <- c("White", "Black/African American", "Hispanic/Latino",
                       "Asian", "Other/Mixed")
  ethnicity_probs <- c(0.42, 0.22, 0.20, 0.10, 0.06)

  long_rows <- list()
  surv_rows <- list()

  for (i in seq_len(n_patients)) {
    ## Demographics
    age <- runif(1, 35, 85)
    sex <- sample(0:1, 1)
    ethnicity <- sample(ethnicity_pool, 1, prob = ethnicity_probs)
    smoking <- sample(0:2, 1, prob = c(0.45, 0.30, 0.25))
    bmi <- round(rnorm(1, 28 + ifelse(ethnicity == "Black/African American", 2, 0) +
                          ifelse(sex == 1, -0.5, 0.5), 5), 1)
    bmi <- pmax(bmi, 17)
    hypertension <- rbinom(1, 1, pmin(0.15 + 0.008 * (age - 40) +
                     ifelse(ethnicity == "Black/African American", 0.12, 0), 0.85))
    diabetes_hx <- rbinom(1, 1, pmin(0.05 + 0.004 * (age - 40) +
                    ifelse(bmi > 30, 0.08, 0), 0.60))

    ## Entry disease
    entry_probs <- c(CKD = 0.40, CVD = 0.35, Diabetes = 0.25)
    if (ethnicity == "Black/African American") entry_probs["CKD"] <- 0.52
    if (ethnicity == "Hispanic/Latino") entry_probs["Diabetes"] <- 0.36
    if (sex == 1) entry_probs["CVD"] <- entry_probs["CVD"] + 0.06
    entry_probs <- entry_probs / sum(entry_probs)
    entry <- sample(diseases, 1, prob = entry_probs)

    ## Biomarker parameters (patient-specific)
    eth_adj <- ifelse(ethnicity == "Black/African American", 8, 0)
    egfr_int <- rnorm(1, 90 + eth_adj - (age - 50) * 0.4 -
                        ifelse(hypertension == 1, 5, 0), 12)
    egfr_slp <- rnorm(1, -2.5 - ifelse(diabetes_hx == 1, 0.8, 0), 1.5)

    bnp_int <- rnorm(1, 70 + (age - 50) * 1.5 + ifelse(sex == 1, 10, 0) +
                       ifelse(hypertension == 1, 30, 0), 25)
    bnp_slp <- rnorm(1, 5 + ifelse(age > 70, 2, 0), 3)

    hba1c_int <- rnorm(1, 5.8 + ifelse(diabetes_hx == 1, 1.5, 0) +
                         ifelse(bmi > 30, 0.3, 0), 0.7)
    hba1c_slp <- rnorm(1, 0.12 + ifelse(diabetes_hx == 1, 0.08, 0), 0.10)

    ## Visit times
    n_visits <- sample(5:18, 1)
    v_times <- sort(cumsum(c(0, rexp(n_visits - 1, 2))))
    v_times <- v_times[v_times <= max_followup]
    if (length(v_times) < 3) v_times <- c(0, 0.5, 1)

    ## Generate biomarker observations
    for (vt in v_times) {
      if (runif(1) > 0.04)
        long_rows[[length(long_rows) + 1]] <- data.frame(
          patient_id = i, visit_time_years = round(vt, 3),
          biomarker = "eGFR",
          value = round(max(5, egfr_int + egfr_slp * vt + rnorm(1, 0, 5)), 1),
          unit = "mL/min/1.73m2", stringsAsFactors = FALSE)
      if (runif(1) > 0.04)
        long_rows[[length(long_rows) + 1]] <- data.frame(
          patient_id = i, visit_time_years = round(vt, 3),
          biomarker = "BNP",
          value = round(max(5, bnp_int + bnp_slp * vt + rnorm(1, 0, 15)), 1),
          unit = "pg/mL", stringsAsFactors = FALSE)
      if (runif(1) > 0.04)
        long_rows[[length(long_rows) + 1]] <- data.frame(
          patient_id = i, visit_time_years = round(vt, 3),
          biomarker = "HbA1c",
          value = round(max(4, min(14, hba1c_int + hba1c_slp * vt + rnorm(1, 0, 0.3))), 1),
          unit = "%", stringsAsFactors = FALSE)
    }

    ## Multi-state survival process
    current <- entry; t_now <- 0; ev_num <- 0
    demo_hr <- exp(0.01 * (age - 55) + 0.08 * smoking + 0.02 * pmax(bmi - 25, 0) +
                   ifelse(hypertension == 1, 0.15, 0))

    while (t_now < max_followup && ev_num < max_events) {
      possible <- setdiff(diseases, current)
      rates <- sapply(possible, function(s) {
        base <- 0.08
        if (current == "CKD" && s == "CVD") base <- 0.12
        if (current == "CVD" && s == "CKD") base <- 0.10
        if (current == "Diabetes" && s == "CVD") base <- 0.11
        if (current == "CVD" && s == "Diabetes") base <- 0.09
        if (current == "Diabetes" && s == "CKD") base <- 0.10
        base * demo_hr
      })

      total_rate <- sum(rates)
      wait <- rexp(1, total_rate)
      t_next <- t_now + wait

      make_surv_row <- function(stat, etype, sto) {
        data.frame(
          patient_id = i,
          start_time = round(t_now, 3),
          stop_time = round(ifelse(stat == 0, max_followup, t_next), 3),
          status = stat, event_type = etype,
          state_from = current,
          state_to = ifelse(stat == 0, NA, sto),
          transition = paste(current, "->", ifelse(stat == 0, "censored", sto)),
          entry_disease = entry,
          age_baseline = round(age, 1), sex = sex,
          ethnicity = ethnicity, smoking = smoking, bmi = round(bmi, 1),
          stringsAsFactors = FALSE)
      }

      if (t_next >= max_followup) {
        surv_rows[[length(surv_rows) + 1]] <- make_surv_row(0, "censored", NA)
        break
      }

      next_state <- sample(possible, 1, prob = rates / total_rate)
      ev_num <- ev_num + 1
      surv_rows[[length(surv_rows) + 1]] <- make_surv_row(1, next_state, next_state)
      current <- next_state; t_now <- t_next
    }

    if (t_now < max_followup && ev_num >= max_events) {
      surv_rows[[length(surv_rows) + 1]] <- make_surv_row(0, "censored", NA)
    }
  }

  long_df <- do.call(rbind, long_rows)
  surv_df <- do.call(rbind, surv_rows)
  rownames(long_df) <- NULL
  rownames(surv_df) <- NULL

  list(long_data = long_df, surv_data = surv_df)
}
