Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • HELP: Reshaping + Cleaning Data

    Hello Statlist,

    I'm am currently working on reshaping the following dataset:

    Code:
    * Example generated by -dataex-. To install: ssc install dataex
    clear
    input str14 A str6(B C D E F)
    "County / Month" "Jan-14" "Feb-14" "Mar-14" "Apr-14" "May-14"
    "Adams"          "2388"   "2380"   "2339"   "2308"   "2308"  
    "(1) Women"      "538"    "534"    "538"    "542"    "521"   
    "(2) Infants"    "399"    "412"    "406"    "389"    "389"   
    "(3) Children"   "1451"   "1434"   "1395"   "1377"   "1398"  
    "Asotin"         "569"    "582"    "586"    "593"    "558"   
    "(1) Women"      "145"    "137"    "141"    "143"    "131"   
    "(2) Infants"    "107"    "118"    "125"    "123"    "116"   
    "(3) Children"   "317"    "327"    "320"    "327"    "311"   
    "Benton"         "5229"   "5013"   "4802"   "4837"   "4837"  
    "(1) Women"      "1203"   "1121"   "1110"   "1142"   "1141"  
    "(2) Infants"    "1114"   "1068"   "1017"   "1053"   "1032"  
    "(3) Children"   "2912"   "2824"   "2675"   "2642"   "2664"  
    "Chelan"         "4389"   "4363"   "4396"   "4385"   "4269"  
    "(1) Women"      "941"    "953"    "955"    "952"    "937"   
    "(2) Infants"    "822"    "828"    "819"    "805"    "793"   
    "(3) Children"   "2626"   "2582"   "2622"   "2628"   "2539"  
    "Clallam"        "1515"   "1540"   "1553"   "1571"   "1550"  
    "(1) Women"      "391"    "396"    "390"    "389"    "379"   
    "(2) Infants"    "349"    "349"    "355"    "363"    "351"   
    "(3) Children"   "775"    "795"    "808"    "819"    "820"   
    "Clark"          "10390"  "10186"  "10070"  "10251"  "10263" 
    "(1) Women"      "2443"   "2357"   "2320"   "2370"   "2349"  
    "(2) Infants"    "2131"   "2084"   "2091"   "2110"   "2071"  
    "(3) Children"   "5816"   "5745"   "5659"   "5771"   "5843"  
    "Columbia"       "107"    "106"    "95"     "95"     "94"    
    "(1) Women"      "22"     "23"     "20"     "18"     "19"    
    "(2) Infants"    "23"     "21"     "17"     "15"     "15"    
    "(3) Children"   "62"     "62"     "58"     "62"     "60"    
    "Cowlitz"        "3687"   "3638"   "3617"   "3642"   "3579"  
    "(1) Women"      "777"    "768"    "763"    "791"    "770"   
    "(2) Infants"    "756"    "754"    "726"    "711"    "706"   
    "(3) Children"   "2154"   "2116"   "2128"   "2140"   "2103"  
    "Douglas"        "0"      "3"      "68"     "159"    "167"   
    "(1) Women"      "0"      "1"      "23"     "44"     "45"    
    "(2) Infants"    "0"      "1"      "15"     "28"     "30"    
    "(3) Children"   "0"      "1"      "30"     "87"     "92"    
    "Ferry"          "89"     "90"     "89"     "89"     "91"    
    "(1) Women"      "26"     "25"     "21"     "24"     "24"    
    "(2) Infants"    "18"     "17"     "12"     "14"     "16"    
    "(3) Children"   "45"     "48"     "56"     "51"     "51"    
    "Franklin"       "6878"   "6846"   "6859"   "6916"   "6983"  
    "(1) Women"      "1479"   "1463"   "1484"   "1508"   "1539"  
    "(2) Infants"    "1232"   "1226"   "1223"   "1242"   "1242"  
    "(3) Children"   "4167"   "4157"   "4152"   "4166"   "4202"  
    "Garfield"       "60"     "57"     "58"     "53"     "62"    
    "(1) Women"      "12"     "10"     "10"     "10"     "12"    
    "(2) Infants"    "15"     "12"     "12"     "10"     "9"     
    "(3) Children"   "33"     "35"     "36"     "33"     "41"    
    "Grant"          "5626"   "5507"   "5481"   "5466"   "5456"  
    "(1) Women"      "1181"   "1153"   "1149"   "1148"   "1142"  
    "(2) Infants"    "996"    "956"    "963"    "977"    "993"   
    "(3) Children"   "3449"   "3398"   "3369"   "3341"   "3321"  
    "Grays Harbor"   "2565"   "2499"   "2464"   "2484"   "2462"  
    "(1) Women"      "536"    "524"    "522"    "521"    "522"   
    "(2) Infants"    "500"    "485"    "477"    "474"    "461"   
    "(3) Children"   "1529"   "1490"   "1465"   "1489"   "1479"  
    "Island"         "1791"   "1746"   "1758"   "1775"   "1749"  
    "(1) Women"      "454"    "435"    "437"    "446"    "448"   
    "(2) Infants"    "371"    "369"    "354"    "369"    "360"   
    "(3) Children"   "966"    "942"    "967"    "960"    "941"   
    "Jefferson"      "476"    "460"    "459"    "428"    "428"   
    "(1) Women"      "111"    "113"    "107"    "101"    "97"    
    "(2) Infants"    "102"    "102"    "95"     "83"     "85"    
    "(3) Children"   "263"    "245"    "257"    "244"    "246"   
    "King"           "38734"  "38011"  "38384"  "38345"  "38473" 
    "(1) Women"      "9200"   "8894"   "9061"   "9066"   "9024"  
    "(2) Infants"    "7967"   "7808"   "7842"   "7860"   "7800"  
    "(3) Children"   "21567"  "21309"  "21481"  "21419"  "21649" 
    "Kitsap"         "5464"   "5294"   "5243"   "5404"   "5413"  
    "(1) Women"      "1343"   "1283"   "1290"   "1321"   "1331"  
    "(2) Infants"    "1142"   "1111"   "1092"   "1128"   "1145"  
    "(3) Children"   "2979"   "2900"   "2861"   "2955"   "2937"  
    "Kittitas"       "857"    "820"    "838"    "811"    "837"   
    "(1) Women"      "195"    "190"    "199"    "198"    "200"   
    "(2) Infants"    "167"    "158"    "167"    "164"    "165"   
    "(3) Children"   "495"    "472"    "472"    "449"    "472"   
    "Klickitat"      "655"    "637"    "627"    "611"    "605"   
    "(1) Women"      "155"    "148"    "152"    "145"    "134"   
    "(2) Infants"    "120"    "126"    "126"    "123"    "126"   
    "(3) Children"   "380"    "363"    "349"    "343"    "345"   
    "Lewis"          "2596"   "2520"   "2534"   "2499"   "2500"  
    "(1) Women"      "568"    "537"    "544"    "535"    "540"   
    "(2) Infants"    "529"    "522"    "519"    "499"    "489"   
    "(3) Children"   "1499"   "1461"   "1471"   "1465"   "1471"  
    "Lincoln"        "213"    "215"    "219"    "221"    "223"   
    "(1) Women"      "40"     "39"     "39"     "35"     "37"    
    "(2) Infants"    "43"     "40"     "37"     "42"     "38"    
    "(3) Children"   "130"    "136"    "143"    "144"    "148"   
    "Mason"          "1625"   "1650"   "1647"   "1682"   "1661"  
    "(1) Women"      "343"    "352"    "362"    "369"    "365"   
    "(2) Infants"    "347"    "353"    "351"    "366"    "352"   
    "(3) Children"   "935"    "945"    "934"    "947"    "944"   
    "Okanogan"       "2393"   "2347"   "2315"   "2183"   "2197"  
    "(1) Women"      "498"    "493"    "479"    "461"    "456"   
    "(2) Infants"    "402"    "390"    "385"    "381"    "374"   
    "(3) Children"   "1493"   "1464"   "1451"   "1341"   "1367"  
    "Pacific"        "477"    "465"    "497"    "490"    "465"   
    "(1) Women"      "113"    "109"    "118"    "125"    "117"   
    "(2) Infants"    "87"     "89"     "96"     "97"     "92"    
    end
    Into something that looks similar to this dataset:

    Code:
    * Example generated by -dataex-. To install: ssc install dataex
    clear
    input str28 county int year byte month double(wic_child wic_infant wic_women)
    "Alameda County" 2010  1 20591 7066 9131
    "Alameda County" 2010  2 20666 6958 9099
    "Alameda County" 2010  3 20917 7026 9299
    "Alameda County" 2010  4 21061 6886 9212
    "Alameda County" 2010  5 20911 6748 9043
    "Alameda County" 2010  6 21253 6774 9134
    "Alameda County" 2010  7 21146 6705 9173
    "Alameda County" 2010  8 21229 6639 9078
    "Alameda County" 2010  9 21322 6687 9161
    "Alameda County" 2010 10 21191 6646 9067
    "Alameda County" 2010 11 21276 6617 8947
    "Alameda County" 2010 12 21141 6609 8904
    "Alameda County" 2011  1 21392 6662 8933
    "Alameda County" 2011  2 20955 6582 8814
    "Alameda County" 2011  3 21296 6648 8904
    "Alameda County" 2011  4 21582 6686 9014
    "Alameda County" 2011  5 21572 6603 8933
    "Alameda County" 2011  6 21815 6586 9044
    "Alameda County" 2011  7 21704 6547 8913
    "Alameda County" 2011  8 21836 6562 8956
    "Alameda County" 2011  9 21841 6519 9043
    "Alameda County" 2011 10 21906 6600 9037
    "Alameda County" 2011 11 22090 6576 9001
    "Alameda County" 2011 12 21861 6654 9073
    "Alameda County" 2012  1 22104 6692 9135
    "Alameda County" 2012  2 21971 6667 9048
    "Alameda County" 2012  3 22190 6710 9152
    "Alameda County" 2012  4 22025 6608 9084
    "Alameda County" 2012  5 22493 6653 9172
    "Alameda County" 2012  6 22374 6677 9154
    "Alameda County" 2012  7 22239 6570 9103
    "Alameda County" 2012  8 22496 6583 9121
    "Alameda County" 2012  9 21989 6597 8987
    "Alameda County" 2012 10 21766 6680 8998
    "Alameda County" 2012 11 21537 6597 8807
    "Alameda County" 2012 12 21008 6482 8520
    "Alameda County" 2013  1 21458 6656 8834
    "Alameda County" 2013  2 20802 6529 8564
    "Alameda County" 2013  3 20683 6566 8511
    "Alameda County" 2013  4 21144 6530 8558
    "Alameda County" 2013  5 21260 6469 8617
    "Alameda County" 2013  6 21217 6386 8476
    "Alameda County" 2013  7 21587 6399 8628
    "Alameda County" 2013  8 21198 6295 8419
    "Alameda County" 2013  9 21271 6230 8507
    "Alameda County" 2013 10 21070 6103 8437
    "Alameda County" 2013 11 20084 5968 8159
    "Alameda County" 2013 12 20200 5912 8141
    "Alameda County" 2014  1 21208 6072 8552
    "Alameda County" 2014  2 22062 6413 8910
    "Alameda County" 2014  3 21784 6530 8908
    "Alameda County" 2014  4 20948 6332 8783
    "Alameda County" 2014  5 19994 6109 8428
    "Alameda County" 2014  6 19598 6074 8344
    "Alameda County" 2014  7 19764 6068 8409
    "Alameda County" 2014  8 19648 6090 8318
    "Alameda County" 2014  9 19659 6048 8341
    "Alameda County" 2014 10 19762 6137 8324
    "Alameda County" 2014 11 19425 6099 8088
    "Alameda County" 2014 12 19080 6065 7915
    "Alameda County" 2015  1 19221 6089 8026
    "Alameda County" 2015  2 18802 6033 7844
    "Alameda County" 2015  3 18623 6032 7730
    "Alameda County" 2015  4 18938 6093 7870
    "Alameda County" 2015  5 18453 5917 7594
    "Alameda County" 2015  6 18607 5808 7688
    "Alameda County" 2015  7 18288 5686 7622
    "Alameda County" 2015  8 18324 5654 7558
    "Alameda County" 2015  9 18424 5814 7592
    "Alameda County" 2015 10 18121 5648 7470
    "Alameda County" 2015 11 17397 5465 7085
    "Alameda County" 2015 12 17279 5447 7061
    "Alameda County" 2016  1 17504 5527 7230
    "Alameda County" 2016  2 17300 5464 7141
    "Alameda County" 2016  3 17321 5607 7126
    "Alameda County" 2016  4 17218 5537 7225
    "Alameda County" 2016  5 18777 6070 7975
    "Alameda County" 2016  6 17965 5980 7543
    "Alameda County" 2016  7 16764 5602 6988
    "Alameda County" 2016  8 16355 5263 6783
    "Alameda County" 2016  9 16210 5204 6845
    "Alameda County" 2016 10 15840 5142 6637
    "Alameda County" 2016 11 15342 5051 6378
    "Alameda County" 2016 12 15063 4986 6293
    "Alameda County" 2017  1 15377 4964 6441
    "Alameda County" 2017  2 15050 4957 6321
    "Alameda County" 2017  3 15437 5182 6457
    "Alameda County" 2017  4 14771 5047 6224
    "Alameda County" 2017  5 14600 4986 6118
    "Alameda County" 2017  6 14394 4905 6076
    "Alameda County" 2017  7 14127 4899 5993
    "Alameda County" 2017  8 14370 4838 6020
    "Alameda County" 2017  9 14102 4792 5903
    "Alameda County" 2017 10 13942 4689 5794
    "Alameda County" 2017 11 13594 4516 5544
    "Alameda County" 2017 12 13354 4411 5350
    "Alameda County" 2018  1 13998 4572 5608
    "Alameda County" 2018  2 13986 4586 5578
    "Alameda County" 2018  3 14306 4641 5663
    "Alameda County" 2018  4 14243 4636 5628
    end
    So just to give a bit of context, what I need help with is the following:
    • Reshaping the data so it is long instead of wide
    • Creating a new variable for county that repeats for each observation
    • Creating a new variable for year and for month
    • Putting Women, Infant, and Children as variable names
    Hopefully this makes, sense but I would appreciate any help.



  • #2
    The following example will start you toward your solution.
    Code:
    foreach v of varlist * {
        if "`v'"=="A" continue
        local new = subinstr(`v'[1],"-","",.)
        replace `v' = "0" in 1
        rename `v' n_`new'
    }
    drop in 1
    generate str14 county = ""
    replace county = cond(substr(A,1,1)!="(",A,county[_n-1])
    order county
    drop if A==county
    replace A = substr(A,5,.)
    generate id = _n
    reshape long n_, i(id) j(date_s) str
    drop id
    reshape wide n_, i(county date_s) j(A) str
    destring n_*, replace
    generate int month = monthly(date_s,"M20Y")
    format month %tm
    drop date_s
    order county month 
    sort county month
    describe
    list in 1/15, noobs abbreviate(12)
    Code:
    . describe
    
    Contains data
      obs:           125                          
     vars:             5                          
    ------------------------------------------------------------------------------------------------
                  storage   display    value
    variable name   type    format     label      variable label
    ------------------------------------------------------------------------------------------------
    county          str14   %14s                  
    month           int     %tm                   
    n_Children      int     %10.0g                Children n_
    n_Infants       int     %10.0g                Infants n_
    n_Women         int     %10.0g                Women n_
    ------------------------------------------------------------------------------------------------
    Sorted by: county  month
         Note: Dataset has changed since last saved.
    
    . list in 1/15, noobs abbreviate(12)
    
      +----------------------------------------------------+
      | county    month   n_Children   n_Infants   n_Women |
      |----------------------------------------------------|
      |  Adams   2014m1         1451         399       538 |
      |  Adams   2014m2         1434         412       534 |
      |  Adams   2014m3         1395         406       538 |
      |  Adams   2014m4         1377         389       542 |
      |  Adams   2014m5         1398         389       521 |
      |----------------------------------------------------|
      | Asotin   2014m1          317         107       145 |
      | Asotin   2014m2          327         118       137 |
      | Asotin   2014m3          320         125       141 |
      | Asotin   2014m4          327         123       143 |
      | Asotin   2014m5          311         116       131 |
      |----------------------------------------------------|
      | Benton   2014m1         2912        1114      1203 |
      | Benton   2014m2         2824        1068      1121 |
      | Benton   2014m3         2675        1017      1110 |
      | Benton   2014m4         2642        1053      1142 |
      | Benton   2014m5         2664        1032      1141 |
      +----------------------------------------------------+

    Comment

    Working...
    X