SAS Examples

OPTIONS PAGESIZE=60 LINESIZE=75 NODATE NOCENTER;
TITLE2 'SAS OPTIONS and TITLE statements';

LINEAR REGRESSION (OLS)
PROC REG;
MODEL korean=gnp rate sex;

PROC REG; /* for dummy variable */
MODEL y=x d xd; /* xd=x*d; */

PROC REG; /* F test on the complete and reduced model */
MODEL y=x1-x3;
TEST x2=0, x3=0;

PROC REG; /* For restricted model */
MODEL y=x1-x3;
RESTRICT x2=0;

PROC REG; /* For no intercept term */
MODEL y=x1-x3 /NOINT;

PROC REG; /* for residual test */
MODEL y=x1-x4 /R DW;

PROC REG; /* to test multicollinearity */
MODEL y= x1-x4 /COLLIN VIF TOL;

PROC REG; /* for the confidence interval */
MODEL y=x1-x4 /CLI CLM;
PLOT r.*x1 /VPLOTS=2 HPLOTS=2;

BINARY RESPONSE MODEL

Binary Logit Regression

PROC LOGISTIC DESCENDING;
MODEL y=x1 x2 /CT INFLUENCE NOINT;

PROC GENMOD DATA = binary.car DESC;

PROC GENMOD DATA = binary.car DESC;
MODEL owncar = budget age male /DIST=BINOMIAL;

PROC PROBIT; /* Logit model using the PROC PROBIT*/
CLASS=Y;
MODEL y=x1 x2 /DIST=LOGISTIC;

PROC QLIM DATA=masil.student;
MODEL owncar = budget age male;
ENDOGENOUS owncar ~ DISCRETE (DIST=LOGIT);

PROC QLIM DATA=masil.student;
MODEL owncar = budget age male /DISCRETE (DIST=LOGIT);

PROC CATMOD DATA = binary.car;
DIRECT budget age;
MODEL owncar = budget age male /NOPROFILE;

Binary Probit Regression

PROC PROBIT;
CLASS=Y;
MODEL y=x1 x2;

PROC LOGISTIC DATA = binary.car DESC; /* Probit Model Using LOGISTIC*/
MODEL owncar = budget age male /LINK=NORMIT;

PROC GENMOD DATA = binary.car DESC;
MODEL owncar = budget age male /DIST=BINOMIAL LINK=PROBIT;

PROC QLIM DATA=masil.student;
MODEL owncar = budget age male /DISCRETE (DIST=NORMAL);

Bivariate Logit/Probit Regression

PROC QLIM DATA=masil.student;
MODEL offcamp = budget age male;
MODEL owncar = budget age male;
ENDOGENOUS offcamp owncar ~ DISCRETE(DIST=LOGIT);

PROC QLIM DATA=masil.student;
MODEL offcamp owncar = budget age male /DISCRETE;

OTHER LOGIT/PROBIT MODEL

Ordinal Response Regression

PROC LOGISTIC DATA = ordinal.park DESC;
MODEL park = budget age male

PROC PROBIT DATA = ordinal.park;
CLASS park;
MODEL park = budget age male /DIST=LOGISTIC;

PROC PROBIT DATA = ordinal.park;
CLASS park;
MODEL park = budget age male /DIST=Normal;

PROC LOGISTIC DATA = ordinal.park DESC;
MODEL park = budget age male /LINK=NORMIT;

PROC QLIM DATA=masil.student;
MODEL park = budget age male /DISCRETE (DIST=LOGIT);

PROC QLIM DATA=masil.student;
MODEL park = budget age male;
ENDOGENOUS park ~ DISCRETE;

Multinomial Logit Regression

PROC CATMOD DATA = nominal.trans;
DIRECT budget age male;
RESPONSE LOGITS;
MODEL mode = budget age male /NOPROFILE;

Nested Muiltinomial Logit Regression

PROC MDC DATA=masil.nlogit;
MODEL decision = time / TYPE=NLOGIT CHOICE=(mode 1 2 3) COVEST=HESS;
ID person;
UTILITY U(1,) = time;
NEST LEVEL(1) = (1 2 @ 1, 3 @ 2), LEVEL(2) = (1 2 @ 1);

Conditional Logit Regression

PROC MDC DATA=clogit.travel;
MODEL choice = walk bike bus time cost /TYPE=CLOGIT NCHOICE=4;
ID subject;

PROC PHREG DATA=clogit.travel2;
STRATA subject;
MODEL f_time*choice(0)=walk bike bus time cost;

EVENT COUNT DATA MODEL
PROC GENMOD DATA = count.waste; /* Poisson Regression */

PROC GENMOD DATA = count.waste; /* Negative Binomial Regression */

LIMITED DEPENDENT VARIALBE MODEL
PROC QLIM DATA=tobit;
MODEL y = age marriage relig occup rate;
ENDOGENOUS y ~ CENSORED (LB=0);

PROC LIFEREG DATA=tobit;
MODEL (low, y) = age marriage relig occup rate /DIST=NORMAL;

PROC QLIM DATA=masil.student; /* Truncated */
MODEL budget = owncar age male ;
ENDOGENOUS budget ~ TRUNCATED (LB=0 UB=10000);

PROC QLIM DATA=js.nes; /* Selected (Heckman model) */
MODEL engagement = interest trust capital religious /SELECT (egov=1);
MODEL egov = income education knowledge /DISCRETE(DIST=NORMAL);

PROC LIFEREG; /* Survival (duration) analysis */
MODEL dv*censored(0)=iv1 iv2 /DISTRIBUTION=NORMAL;

PROC PHREG; /* Survival (duration) analysis */
MODEL events/trial=iv1 iv2;

T-TEST
PROC TTEST H0=20 ALPHA=.01; /* One sample */
VAR lung;

PROC TTEST H0=20 ALPHA=.01; /* for aggregate data */
VAR lung;
FREQ count;

PROC UNIVARIATE MU0=600 VARDEF=DF NORMAL ALPHA=.01;
VAR lung;

PROC MEANS MEAN STD STDERR T VARDEF=DF PROBT CLM ALPHA=.01;
VAR lung;

PROC TTEST;
PAIRED pre*post0;
PAIRED (a b)*(c d); /* a*b, a*d, b*c, b*d */

PROC TTEST; /* two independent samples */
CLASS sex;
VAR mean;

PROC TTEST DATA=kucc625 COCHRAN; /* for unequal variance */
CLASS sex;
VAR mean;

MULTIVARIATE ANALYSIS
PROC CORR DATA=kucc625 PEARSON COV; /* Karl Pearson Correlation */
VAR korean english;

PROC FACTOR DATA=kucc625 VARIMAX; /* Factor Analysis */

PROC DISCRIM METHOD=NORMAL; /* Dicriminent Analysis*/
CLASS=types;
VAR x1-x5;

PROC CANCORR; /* Canonical Correlation Analysis*/
VPREFIX=Physical VNAME='Physical Measurement'
WPREFIX=Psychological WNAME='Psychological Measurement';
VAR phy1 phy2 phy3;
WITH psy1 psy2 psy3 psy4;

NONPARAMETRIC ANALYSIS
PROC NPAR1WAY DATA=kucc625 WINCOXON;/* Wilcoxon's rank-sum test) */
CLASS sex;
VAR mean;

PROC NPAR1WAY;/* Kruskal-Wallis Test */
CLASS sex;
VAR mean;

CATEGORICAL DATA ANALYSIS
PROC FREQ DATA=kucc625 PAGE;
TABLES total*name;

PROC FREQ DATA=kucc625 PAGE;
TABLES mean*name /NOROW NOCOL NOPERCENT;

PROC FREQ DATA=kucc625 PAGE;
TABLES age*sex / EXPECTED CHISQ;
PROC FREQ; /* for aggregate data */
WEIGHT count;
TABLES age*sex / EXPECTED CHISQ;

DESCRIPTIVE STATISTICS
PROC PRINT DATA=kucc625 NOOBS DOUBLE; /* double space */
VAR korean; ID name;
SUM korean;

PROC SUMMARY;
CLASS sex; VAR korean english;
OUTPUT OUT=kucc625.sum;

PROC UNIVARIATE DATA=kucc625 PLOT; /* to draw graph */
VAR total;
ID name;

PROC SUMMARY;
CLASS sex; VAR korean english;
OUTPUT OUT=kucc625.sum;

PROC CORR COV; /* Correlation and covariances */
VAR x1-x10;
PARTIAL x9-x10;

PROC CORR ALPHA; /* Cronbach's coefficient alpha */
VAR x1-x10;

PROC MEANS; /* To save output */
VAR x y;
BY year;
OUTPUT OUT=agg_data MEAN=xBar yBar;

PROC TABULATE;
CLASS sex; VAR score;
TABLE sex ALL, score*(MEAN STD);

DATA MANIPULATION
PROC SORT DATA=kucc625 OUT=kucc625.srt
BY DESCENDING sex name;

PROC FORMAT;
VALUE \$gender_fmt 0='Female' 1='Male';

DATA example;
SET original;
FORMAT gender gender_fmt; /* value label*/

DATA example;
SET original;
LABEL campus='IU Campus' school='School'; /*variable label */
LABEL college= department=; /* removing variable label*/

PROC SURVEYSELECT DATA=original OUT=sample
METHOD=PPS SAMPSIZE=100 SEED=1234567;
SIZE ids;
STRATA state;

PLOT/CHART
PROC PLOT;
PLOT korean*sex='*' english*sex='#' /OVERLAY;

PROC CHART;
VBAR total /LEVELS=3;
VBAR mean /MIDPOINTS=20 60 90 ;
HBAR sex /DISCRETE;