How many distinct values are you trying to calculate? (Not the number of observations, but the number of categories.)
-
Login or Register
- Log in with
* Example generated by -dataex-. To install: ssc install dataex clear input long(patnum permno) int year str3 class 1706123 10006 1921 "251" 1579225 10006 1922 "137" 1699538 10006 1922 "198" 1605442 10006 1922 "164" 1579247 10006 1922 "072" 1699546 10006 1923 "403" 1748147 10006 1923 "105" 1727684 10006 1923 "220" 1665389 10006 1923 "403" 1665388 10006 1923 "105" 1876807 10006 1923 "108" 1605415 10006 1923 "267" 1579325 10006 1923 "072" 1605417 10006 1923 "403" 1579268 10006 1923 "105" 1631340 10006 1923 "187" 1579234 10006 1923 "074" 1649439 10006 1924 "384" 1748114 10006 1924 "296" 1649395 10006 1925 "105" 1665392 10006 1925 "052" 2085621 10006 1925 "105" 1760688 10006 1925 "292" 1626654 10006 1925 "105" 1665368 10006 1925 "220" 1649431 10006 1925 "005" 1665407 10006 1925 "180" 1626653 10006 1925 "105" 1685132 10006 1925 "220" 1616582 10006 1925 "105" 1631309 10006 1925 "454" 1665391 10006 1925 "137" 1579214 10006 1925 "148" 1685111 10006 1925 "220" 1631313 10006 1926 "105" 1631314 10006 1926 "105" 1685126 10006 1926 "411" 1605410 10006 1926 "295" 1727638 10006 1926 "105" 1649434 10006 1926 "188" end
*! 1.0.0 NJC 9 January 2019 *! entropyetc 2.0.0 NJC 5 July 2018 *! entropyetc 1.0.0 NJC 20 November 2016 program entropyetc_, rclass version 11.2 syntax varname [if] [in] [aweight fweight] [, by(varlist) Generate(str) Format(str) * ] quietly { marksample touse, strok if "`by'" != "" markout `touse' `by', strok count if `touse' if r(N) == 0 error 2000 if "`generate'" != "" parsegenerate `generate' tempvar group Shannon Simpson Shannon2 Simpson2 dissim categ total tempname recJ mylbl if "`by'" != "" { egen long `group' = group(`by') if `touse', label compress `group' su `group', meanonly local ng = r(max) } else { gen byte `group' = `touse' local ng = 1 label define `group' 1 "all" label val `group' `group' } foreach s in Shannon Simpson Shannon2 Simpson2 dissim { gen ``s'' = 0 if `touse' } label var `Shannon' "Shannon H" label var `Shannon2' "exp(H)" label var `Simpson' "Simpson" label var `Simpson2' "1/Simpson" label var `dissim' "dissim." egen long `categ' = group(`varlist') compress `categ' su `categ', meanonly local J = r(max) scalar `recJ' = 1/`J' if "`exp'" == "" local exp 1 gen `total' = 0 forval j = 1/`J' { tempvar p`j' bysort `group' : gen `p`j'' = sum(`exp' * `categ' == `j') by `group' : replace `p`j'' = `p`j''[_N] replace `total' = `total' + `p`j'' } forval j = 1/`J' { replace `p`j'' = `p`j'' / `total' replace `Shannon' = `Shannon' + max(0, -`p`j'' * ln(`p`j'')) replace `Simpson' = `Simpson' + `p`j''^2 replace `dissim' = `dissim' + abs(`p`j'' - `recJ') } replace `Simpson2' = 1/`Simpson' replace `Shannon2' = exp(`Shannon') replace `dissim' = `dissim'/2 return scalar categories = `J' label var `group' "Group" if "`format'" == "" local format "%4.3f" } quietly if "`generate'" != "" { local lbl1 "Shannon H" local lbl2 "exp(H)" local lbl3 "Simpson" local lbl4 "1/Simpson" local lbl5 "dissimilarity index" tokenize `Shannon' `Shannon2' `Simpson' `Simpson2' `dissim' forval j = 1/5 { if "`var_`j''" != "" { gen `var_`j'' = ``j'' label var `var_`j'' "`lbl`j''" } } } capture noisily tabdisp `group' if `touse', /// c(`Shannon' `Shannon2' `Simpson' `Simpson2' `dissim') /// format(`format') `options' end program parsegenerate tokenize `0' if "`6'" != "" { di as err "generate() should specify 1 to 5 tokens" exit 134 } forval j = 1/5 { if "``j''" != "" { gettoken no rest : `j', parse(=) capture numlist "`no'", max(1) int range(>=1 <=5) if _rc { di as err "generate() error: ``j''" exit _rc } gettoken eqs rest : rest, parse(=) confirm new var `rest' c_local var_`no' "`rest'" } } end
set rmsg on sysuse auto, clear entropyetc rep78 entropyetc rep78, by(foreign) webuse nlsw88 entropyetc occupation, by(industry) gen(2=numeq) egen tag = tag(industry) graph dot (asis) numeq if tag, over(industry, sort(1) descending) linetype(line) sysuse auto, clear entropyetc_ rep78 entropyetc_ rep78, by(foreign) webuse nlsw88 entropyetc_ occupation, by(industry) gen(2=numeq) egen tag = tag(industry) graph dot (asis) numeq if tag, over(industry, sort(1) descending) linetype(line)
Comment