I agree with the last comment of Oded Mcdossi....... that is good
-
Login or Register
- Log in with
capture program drop coo_prx program coo_prx syntax anything, sum(integer) Measure(string) // coo_prx matrixname, sum(number_of_cases) m(measure_type) loc COO `anything' // co-occurance matrix parse_dissim `measure' loc measure `s(dist)' if "`s(binary)'"=="" { di as error "only binary measures accepted" exit } qui mat list `COO' // assert matrix exists if issymmetric(`COO')!=1 { di as error "matrix `C' not symmetric" exit } mata: dissmat("`COO'", `sum', "`measure'") mat coln PRX=`:colnames `COO'' mat rown PRX=`:rownames `COO'' mat list PRX, f(%8.4f) // proximity matrix stored in PRX end mata: function dissmat(string scalar COO, real scalar sum, string scalar measure) { measure=st_local("measure") COO=st_matrix(COO) PRX=J(cols(COO),rows(COO),.) for (i=1;i<=rows(COO);i++) { for (j=1;j<=rows(COO);j++) { if (i!=j) { a=COO[i,j] b=COO[j,j]; b=b-a c=COO[i,i]; c=c-a d=sum-(a+b+c) if (measure=="matching") PRX[i,j]=(a+d)/(a+b+c+d) else if (measure=="Jaccard") PRX[i,j]=a/(a+b+c) else if (measure=="Russell") PRX[i,j]=a/(a+b+c+d) else if (measure=="Hamann") PRX[i,j]=((a+d)-(b+c))/(a+b+c+d) else if (measure=="Dice") PRX[i,j]=(2*a)/(2*a+b+c) else if (measure=="antiDice") PRX[i,j]=a/(a+2*(b+c)) else if (measure=="Sneath") PRX[i,j]=(2*(a+d))/(2*(a+d)+(b+c)) else if (measure=="Rogers") PRX[i,j]=(a+d)/((a+d)+2*(b+c)) else if (measure=="Ochiai") PRX[i,j]=a/sqrt((a+b)*(a+c)) else if (measure=="Yule") PRX[i,j]=(a*d-b*c)/(a*d+b*c) else if (measure=="Anderberg") PRX[i,j]=(a/(a+b) + a/(a+c) + d/(c+d) + d/(b+d))/4 else if (measure=="Kulczynski") PRX[i,j]=(a/(a+b) + a/(a+c))/2 else if (measure=="Pearson") PRX[i,j]=(a*d-b*c)/sqrt((a+b)*(a+c)*(d+b)*(d+c)) else if (measure=="Gower2") PRX[i,j]=a*d/sqrt((a+b)*(a+c)*(d+b)*(d+c)) else _error(3888) } else { PRX[i,j]=0 } } } st_matrix("PRX",PRX) } end
clear mat drop _all // Some random data set obs 10000 loc n=70 forval i=1/`n' { gen v`i'=floor(2*runiform()) } // produce co-occurrence matrix mat C=J(`n',`n',.) loc nlist "" forval i=1/`n' { loc nlist="`nlist' v`i'" } mat coln C=`nlist' mat rown C=`nlist' forval i=1/`n'{ forval j=1/`n' { if (`i'!=`j') { qui count if (v`i'==v`j' & v`i'==1) mat C[`i',`j']=`r(N)' } else { qui count if v`i'==1 mat C[`i',`i']=`r(N)' } } } // run the program coo_prx C, sum(1000) m(match) // sum() takes any positive integers, and m() takes any of the binary similarity measures
capture program drop todistance program define todistance, rclass // to convert a coocurrence matrix into distances if "A`2'"=="A" { local 2 trace(`1') } matrix J=diag(J(1,`=rowsof(`1')',1)) matrix K=diag(J(1,`=rowsof(`1')',1)) forvalues X=2/`=rowsof(`1')' { forvalues Y=1/`=`X'-1' { matrix O`X'_`Y'=J(2,2,.) matrix O`X'_`Y'=J(2,2,.) matrix O`X'_`Y'[1,1]=`1'[`X',`Y'] matrix O`X'_`Y'[1,2]=`1'[`X',`X']-`1'[`X',`Y'] matrix O`X'_`Y'[2,2]=`2'-`1'[`X',`X']-`1'[`Y',`Y']+`1'[`X',`Y'] matrix O`X'_`Y'[2,1]=`1'[`Y',`Y']-`1'[`X',`Y'] matrix rownames O`X'_`Y'=Paper`X'(Si) Paper`X'(No) matrix colnames O`X'_`Y'=Paper`Y'(Si) Paper`Y'(No) // matlist O`X'_`Y' matrix J[`X',`Y']=O`X'_`Y'[1,1]/(O`X'_`Y'[1,1]+O`X'_`Y'[1,2]+O`X'_`Y'[2,1]) matrix J[`Y', `X']=J[`X',`Y'] matrix K[`X',`Y']=((O`X'_`Y'[1,1]/(O`X'_`Y'[1,1]+O`X'_`Y'[1,2]))+(O`X'_`Y'[1,1]/(O`X'_`Y'[1,1]+O`X'_`Y'[2,1])))/2 matrix K[`Y', `X']=K[`X',`Y'] return matrix O`X'_`Y'=O`X'_`Y' } } matrix colnames J=`:colnames(`1')' matrix rownames J=`:rownames(`1')' matrix colnames K=`:colnames(`1')' matrix rownames K=`:rownames(`1')' matlist J, title("Jaccard distances") format(%3.2f) matlist K, title("Kulczynski distances")format(%3.2f) end // Input the coocurrence matrix with input (or with the data editor). clear input p1 p2 p3 p4 60 10 20 25 10 50 30 15 20 30 40 12 25 15 12 30 end // Convert your data file into a matrix (A) mkmat p1-p4, mat(A) matrix rownames A=`:colnames(A)' //Convert your coocurrence matrix (similarity matrix, but without diagonal of 1's //into a distance (dissimilarity) matrix //(Note that in this case you obtain Jaccard -J- and Kulczynski -K-) todistance A //you may add a number if the total is diferent to the sum of the diagonal // Once you have J (Jaccard) and K (Kulczynski), you can apply MDS to these distances matrices. mdsmat J, s2d(standard) // you can add other options as convenient (see help mdsmat) mdsconfig, name(Jacard, replace) mdsmat K, s2d(standard) mdsconfig, name(Kulczynski, replace)
mat A=(.,20,20,10\20,.,15,10\20,15,.,25\10,10,25,.) // fake co-occurrence matrix with missing along the diagonal mat list A // display matrix A mata: st_matrix("B",rowsum(st_matrix("A"))) // returns the row sums in 4x1 matrix named B; see *1* for Stata equivalent // this loop fills the diagonal with row sums forval i=1/`=colsof(A)' { mat A[`i',`i']=B[`i',1] } // calculate the grand total from the diagonal (could just use matrix B, but let's pretend we don't have B); see *2* for Stata equivalent mat C=vecdiag(A) // extracts the diagonal in a 1x4 matrix named C mata: st_local("ttl",strofreal(rowsum(st_matrix("C"))/2)) // calculate grand total and store in Stata local `ttl' mat list A /*------------ symmetric A[4,4] c1 c2 c3 c4 r1 50 r2 20 45 r3 20 15 60 r4 10 10 25 45 ----------------*/ /*---------------- v1 1 0 v2 1 a b 0 c d ----------------*/ // calcuate a-d based on A[2,1] loc a=A[2,1] // 20 loc b=A[2,2]-`a' // 25 (A[2,2] from the diagonal) loc c=A[1,1]-`a' // 30 (A[1,1] from the diagonal) loc d=`ttl'-(`a'+`b'+`c') // 25 (`ttl' from the grand total) di "| a=`a' | b=`b' | c=`c' | d=`d' |" //----Stata equivalent of mata lines----// *1* /*--- mat B=J(`=colsof(A)',1,0) forval i=1/`=colsof(A)' { forval j=1/`=colsof(A)' { if `i'!=`j' { mat B[`i',1]=B[`i',1]+A[`i',`j'] } } } ----*/ *2* /*---- loc ttl=0 forval i=1/`=colsof(C)' { loc ttl=`ttl'+A[`i',`i'] } loc ttl=`ttl'/2 ----*/
PaperA | PaperB | PaperC | PaperD | |
Paper1 | 0 | 0 | 0 | 0 |
Paper2 | 1 | 0 | 0 | 0 |
Paper3 | 1 | 1 | 0 | 0 |
Paper4 | 1 | 1 | 1 | 0 |
Paper5 | 1 | 1 | 1 | 1 |
Paper1 | Paper2 | Paper3 | Paper4 | Paper5 | |
Paper1 | 0 | 0 | 0 | 0 | 0 |
Paper2 | 0 | 1 | 1 | 1 | 1 |
Paper3 | 0 | 1 | 2 | 2 | 2 |
Paper4 | 0 | 1 | 2 | 3 | 3 |
Paper5 | 0 | 1 | 2 | 3 | 4 |
net install coin, from(http://sociocav.usal.es/stata/)
. clear . net install coin, from(http://sociocav.usal.es/stata) replace . input p1 p2 p3 p4 co 1. 1 1 0 0 10 2. 1 0 1 0 20 3. 1 0 0 1 25 4. 0 1 1 0 30 5. 0 1 0 1 15 6. 0 0 1 1 12 7. end . coin p1-p4 [fweight=co], f plot(pca) p(.65) 112 scenarios. 2 p<=.65 coincidences amongst 4 events. Density: 0.33 4 events(n>=5): p1 p2 p3 p4 Frequencies | p1 p2 p3 p4 ---------------------+---------------------------- p1 | 55 p2 | 10 55 p3 | 20 30 62 p4 | 25 15 12 52
Comment