/* GINI CODE ========= This SAS code was written by Philip N. Cohen. It is meant to be adaptable to various units of analysis and measures of interest. The Gini coefficient can be calculated for lots of different distributions, although it is most often used for income. The formula used here is from _The methods and materials of demography_, by Henry S. Shryock, Jacob S. Siegel, and associates. Orlanda, FL: Academic Press, 1976 (p. 98). (The author of the code can take no responsibility for its reliability or accuracy, or for the results obtained with its use; but he would be glad to take partial credit for it successful use or adaptation.) */ /* The variable I use is CAPINC and the weight is CAPWGT. Substitute these for your own measure and population weight. Those are the only variable names you have to change to suit your data. */ /* This creates a table with one line for each level of income, the number of (weighted) people with that income, and the percent with that income. */ title 'Income distribution'; proc freq data=temp; tables capinc / noprint out = table; format capinc 7.0; weight capwgt; run; /* this data step creates cumulative income and population columns */ data table; set table; retain suminc perpop; suminc + (capinc * count); perpop + percent; /* suminc is the cumulative income at each point in the distribution. perpop is the cumulative population at each point in the distribution. Note that PERCENT and COUNT are variables created by PROC FREQ. */ run; /* This sort and data step takes the last value of suminc, which is the total income, and adds it onto every record in the table as totalinc. Then it divides suminc by totalinc for each line to create the percent of income below that point in the distribution */ proc sort data=table; by descending suminc ; run; data table; set table; by descending suminc; if _n_=1 then do; totalinc=suminc; end; retain totalinc; perinc = (suminc/totalinc) * 100; run; /* this sort just puts it back in order from low to high */ proc sort data=table; by perpop; run; /* To calculate Gini: sum[Xsub(i) * Ysub(i+1)] - sum[Xsub(i+1) * Ysub(i)] where X is the proportion of population column and Y is the proportion of income column. */ data ginidat; set table; xlag = lag(perpop); xlag = xlag / 100; ylag = lag(perinc); ylag = ylag / 100; columna = (perinc/100) * xlag; columnb = (perpop/100) * ylag; retain suma sumb; suma + columna; sumb + columnb; gini = suma - sumb; run; title2 'Gini coefficient'; proc print data=ginidat; var gini; where perinc = 100; run; title2; /* Optional graph portion: For graphing a Lorenz curve, these steps output table data in truncated form, and read it back in, taking only the last occurrence of each whole percentage of the population. That just cuts it down to 101 records (0-100), instead of however many thousands. Note that this creates a file called 'temp.dat' in your C: directory: adjust to suit. */ data table; set table; file 'C:\temp.dat'; put perinc 4.2 perpop 4.0; run; data table; filename in 'C:\temp.dat'; infile in; input perinc perpop; run; proc sort data=table; by perpop; run; data table; set table; by perpop; if last.perpop; run; proc print data=table; /* creates a simple Lorenz curve graph */ symbol1 interpol=join width=2 value=none height=1 color=black; proc gplot data=table; plot perinc*perpop=1; run;