import pandas as pdimport numpy as np# 1. Subset and rename variables (analogous to select + renaming in R)sum_vars = ga_ma_full.rename(columns={"avg_enrollment": "MA Enrollment","avg_eligibles": "MA Eligibles","partc_score": "Star Rating","raw_rating": "Raw Score",})[["MA Enrollment", "MA Eligibles", "Star Rating", "Raw Score"]]# 2. Define missing-count function (miss_n)def miss_n(x):return x.isna().sum()# 3. (Optional) simple text histogram for each variabledef hist_text(x, bins=10): counts, _ = np.histogram(x.dropna(), bins=bins)return" | ".join(map(str, counts))# 4. Build summary table: Mean, SD, missing, and histogramsummary = pd.DataFrame({"Mean": sum_vars.mean(),"SD": sum_vars.std(),"Missing N": sum_vars.apply(miss_n),"Histogram": sum_vars.apply(hist_text), # drop this line if you don't want the histogram column})# 5. Optional: format to 2 decimal places for Mean and SDsummary[["Mean", "SD"]] = summary[["Mean", "SD"]].round(2)summary