# Define the simulation function
<- function(age) {
simulate_growth = length(age)
n # Simulate height (in feet)
<- rnorm(n,
height mean = 1.5 + .3*age + 0.005*age^2, # Quadratic growth pattern
sd = .3) # Random variation
# Simulate weight (in lb)
<- rnorm(n,
weight mean = 6.5 + 5*age + 0.25*height, # Linear relationship with age and height
sd = 4) # Random variation
data.frame(age=age, height=height, weight=weight)
}
# Generate data for ages 0-12
<- runif(100, 0, 12)
ages = simulate_growth(ages)
sim_data
# Plot the simulated relationships
par(mfrow=c(2,1))
plot(sim_data$age, sim_data$height,
xlab="Age (years)", ylab="Height (feet)",
main="Age vs Height")
plot(sim_data$age, sim_data$weight,
xlab="Age (years)", ylab="Weight (lb)",
main="Age vs Weight")
Problem set 2
Due by 11:59 PM on Monday, April 14, 2025
Instructions
Please use an RMarkdown file to complete this assignment. Make sure you reserve code chunks for code and write out any interpretations or explanations outside of code chunks. Submit the knitted PDF file containing your code and written answers on Canvas.
Questions
From the Howell1
dataset, consider only people younger than 13 years old. Estimate the causal association between age and weight. Assume that age influences weight through two paths. First, age influences height, and height influences weight. Second, age directly influences weight through age-related changes in muscle growth and body proportions.
- Write a generative simulation that takes age as an input and simulates height and weight.
Click to see the answer
This simulation:
- Takes age as input and generates both height and weight
- Uses a quadratic function for height to capture the non-linear growth pattern
- Uses a linear function for weight that depends on both age and height
- Includes random variation (noise) in both height and weight
- The plots show the expected relationships: both height and weight increase with age
- Write out a mathematical model to estimate the linear relationship between age and weight. (Just age, not height!)
Click to see the answer
There are many priors that could be appropriate here. Don’t worry if yours looks different from mine. Here’s just one example:
Where:
is the weight of individual is the age of individual is the intercept is the effect of age on weight is the standard deviation of the weight distribution
This model:
- Assumes weight is normally distributed around a mean
- The mean
is a linear function of age - Uses weakly informative priors for all parameters
- The exponential prior on
ensures it’s positive
- Fit this model using
brms
. Create two plots: one of regression lines implied by your prior and one of the regression lines implied by the posterior.
Click to see the answer
# Load required packages
library(brms)
library(tidyverse)
library(cowplot)
library(patchwork)
data("Howell1", package="rethinking")
# Filter for children under 13
<- Howell1[Howell1$age < 13,]
d
# Define the model
<- brm(
mp ~ age,
weight data = d,
prior = c(
prior(normal(7, 1.5), class = Intercept),
prior(normal(0, 1), class = b),
prior(exponential(1), class = sigma)
),sample_prior = "only" # First get prior samples
)
# Get prior samples
<- as_draws_df(mp)
prior_samples
# Now fit the model with the data
<- brm(
m ~ age,
weight data = d,
prior = c(
prior(normal(7, 1.5), class = Intercept),
prior(normal(0, 1), class = b),
prior(exponential(1), class = sigma)
)
)
# Get posterior samples
<- as_draws_df(m)
post_samples
<- ggplot(d, aes(x=age, y=weight)) +
p1 geom_blank() +
geom_abline(aes(intercept=b_Intercept, slope=b_age),
data=prior_samples[1:50, ],
alpha=.2) +
labs(title="Lines from prior")
<- ggplot(d, aes(x=age, y=weight)) +
p2 geom_point() +
geom_abline(aes(intercept=b_Intercept, slope=b_age),
data=post_samples[1:50, ],
alpha=.2)+
labs(title="Lines from posterior")
| p2) (p1
The plots show:
- Prior regression lines: These show our uncertainty before seeing the data. The lines are widely spread, reflecting our weak priors.
- Posterior regression lines: These show our uncertainty after seeing the data. The lines are much more concentrated, showing that the data has informed our estimates.
The model summary shows the estimated effects of age and height on weight, along with their uncertainty. The standardized coefficients allow us to compare the relative importance of age and height in predicting weight.