Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Splitting a string varaibles that has a space

    Hello all,

    I'm trying to split the first variable of the dataset below in two variables. As you can see, it has two parts, which are two numbers: 1 (space) 1, 1 (space) 2, and so on.
    I was trying to split and using substr, however, I couldn't get stata to read the space between the two numbers so to generate two new varaibles.

    So the split should divide the first variable in two variables.


    Code:
    * Example generated by -dataex-. To install: ssc install dataex
    clear
    input str105 dist str21 Superfície_total   str45 Valorcadastral str13 Valorsuelo str17 Valorconsserv str26 valorcadastral str13 valorsuelo str17 Valorconstruccion str19 dist1
    "1     1  "  "281.185  " "260,5  " "142,6  " "117,9  " "926,4  "   "507,2  " "419,2  " "1     1  " 
    "1     2  "  "105.262  " "53,3  "  "30,7  "  "21,0  "  "506,3  "   "292,0  " "199,4  " "1     2  " 
    "1     3  "  "142.322  " "95,9  "  "48,4  "  "47,4  "  "673,8  "   "339,7  " "332,9  " "1     3  " 
    "1     4  "  "210.574  " "121,4  " "57,5  "  "63,9  "  "576,6  "   "273,1  " "303,5  " "1     4  " 
    "1     5  "  "227.173  " "137,0  " "65,9  "  "71,1  "  "603,2  "   "290,1  " "313,1  " "1     5  " 
    "1     6  "  "130.012  " "63,6  "  "38,6  "  "25,1  "  "489,5  "   "296,8  " "192,7  " "1     6  " 
    "1     7  "  "98.810  "  "56,5  "  "31,0  "  "25,5  "  "571,8  "   "313,3  " "258,5  " "1     7  " 
    "1     8  "  "98.802  "  "44,4  "  "27,9  "  "16,6  "  "449,6  "   "282,1  " "167,6  " "1     8  " 
    "1     9  "  "91.118  "  "45,8  "  "28,2  "  "17,6  "  "502,5  "   "309,6  " "192,9  " "1     9  " 
    "1     10  " "94.147  "  "42,1  "  "26,1  "  "16,0  "  "446,7  "   "277,2  " "169,5  " "1     10  "
    "1     11  " "128.976  " "60,0  "  "37,5  "  "22,4  "  "464,9  "   "291,1  " "173,8  " "1     11  "
    "1     12  " "80.130  "  "41,1  "  "26,9  "  "14,2  "  "513,1  "   "335,3  " "177,7  " "1     12  "
    "1     13  " "62.619  "  "33,9  "  "20,1  "  "13,8  "  "541,5  "   "320,6  " "220,9  " "1     13  "
    "1     14  " "62.867  "  "27,2  "  "20,0  "  "7,2  "   "433,4  "   "318,2  " "115,2  " "1     14  "
    "1     15  " "93.479  "  "52,8  "  "32,5  "  "20,4  "  "565,4  "   "347,6  " "217,8  " "1     15  "
    "1     16  " "85.333  "  "44,0  "  "32,8  "  "11,2  "  "516,1  "   "384,9  " "131,1  " "1     16  "
    "1     17  " "75.759  "  "32,1  "  "23,1  "  "9,0  "   "424,4  "   "305,1  " "119,2  " "1     17  "
    "1     18  " "127.065  " "79,5  "  "36,5  "  "43,0  "  "625,4  "   "287,0  " "338,0  " "1     18  "
    "1     19  " "202.897  " "101,7  " "63,0  "  "38,7  "  "501,0  "   "310,3  " "190,7  " "1     19  "
    "1     20  " "261.627  " "163,6  " "86,4  "  "77,2  "  "625,4  "   "330,4  " "295,0  " "1     20  "
    "1     21  " "292.861  " "224,8  " "139,2  " "85,6  "  "767,6  "   "475,4  " "292,2  " "1     21  "
    "1     22  " "312.408  " "274,2  " "138,6  " "135,6  " "877,7  "   "443,6  " "434,2  " "1     22  "
    "1     23  " "373.343  " "276,8  " "160,8  " "115,9  " "741,3  "   "430,8  " "310,4  " "1     23  "
    "1     25  " "238.945  " "109,4  " "66,7  "  "42,7  "  "457,8  "   "279,3  " "178,6  " "1     25  "
    "1     26  " "215.058  " "106,2  " "59,8  "  "46,4  "  "493,9  "   "278,1  " "215,8  " "1     26  "
    "1     27  " "189.514  " "90,7  "  "49,9  "  "40,8  "  "478,7  "   "263,3  " "215,4  " "1     27  "
    "1     28  " "252.397  " "138,6  " "89,9  "  "48,7  "  "549,2  "   "356,3  " "192,9  " "1     28  "
    "1     29  " "162.197  " "81,4  "  "52,3  "  "29,1  "  "501,6  "   "322,2  " "179,4  " "1     29  "
    "1     30  " "365.783  " "250,7  " "159,6  " "91,0  "  "685,3  "   "436,4  " "248,9  " "1     30  "
    "1     31  " "157.628  " "75,1  "  "48,9  "  "26,2  "  "476,3  "   "310,0  " "166,3  " "1     31  "
    "1     32  " "322.569  " "303,9  " "160,6  " "143,3  " "942,0  "   "497,8  " "444,2  " "1     32  "
    "1     33  " "38.262  "  "22,0  "  "15,8  "  "6,1  "   "574,1  "   "414,1  " "160,0  " "1     33  "
    "1     34  " "31.473  "  "18,5  "  "13,2  "  "5,3  "   "587,4  "   "419,3  " "168,1  " "1     34  "
    "1     35  " "40.902  "  "21,8  "  "15,5  "  "6,3  "   "533,4  "   "380,0  " "153,3  " "1     35  "
    "1     36  " "353.319  " "364,5  " "128,2  " "236,3  " "1.031,6  " "362,7  " "668,7  " "1     36  "
    "1     37  " "54.819  "  "28,2  "  "18,4  "  "9,7  "   "513,7  "   "336,5  " "177,1  " "1     37  "
    "1     38  " "43.377  "  "22,9  "  "17,2  "  "5,7  "   "528,8  "   "396,8  " "132,0  " "1     38  "
    "1     39  " "56.843  "  "31,5  "  "22,3  "  "9,2  "   "554,4  "   "392,0  " "162,4  " "1     39  "
    "1     40  " "47.361  "  "29,3  "  "20,0  "  "9,2  "   "617,9  "   "423,1  " "194,8  " "1     40  "
    "1     41  " "51.903  "  "27,5  "  "20,4  "  "7,2  "   "530,2  "   "392,1  " "138,1  " "1     41  "
    "1     42  " "101.918  " "66,4  "  "35,5  "  "29,7  "  "651,4  "   "348,5  " "291,4  " "1     42  "
    "1     43  " "336.865  " "160,3  " "82,9  "  "77,4  "  "475,9  "   "246,0  " "229,9  " "1     43  "
    "1     44  " "135.384  " "62,5  "  "38,8  "  "23,8  "  "461,9  "   "286,3  " "175,6  " "1     44  "
    "1     45  " "168.848  " "66,4  "  "40,6  "  "25,8  "  "393,3  "   "240,4  " "152,9  " "1     45  "
    "1     46  " "201.482  " "104,5  " "61,5  "  "43,1  "  "518,8  "   "305,0  " "213,8  " "1     46  "
    "1     47  " "177.625  " "98,1  "  "55,5  "  "42,5  "  "552,2  "   "312,6  " "239,5  " "1     47  "
    "1     48  " "112.255  " "50,7  "  "29,9  "  "20,9  "  "452,1  "   "266,3  " "185,8  " "1     48  "
    "1     49  " "114.293  " "65,6  "  "41,8  "  "23,8  "  "573,6  "   "365,4  " "208,2  " "1     49  "
    "1     50  " "107.999  " "51,1  "  "35,7  "  "15,4  "  "473,0  "   "330,6  " "142,4  " "1     50  "
    "1     51  " "92.129  "  "42,0  "  "30,0  "  "12,0  "  "455,7  "   "325,6  " "130,1  " "1     51  "
    "1     52  " "312.260  " "195,9  " "129,0  " "66,9  "  "627,5  "   "413,2  " "214,3  " "1     52  "
    "1     53  " "112.556  " "55,8  "  "28,7  "  "26,6  "  "495,7  "   "254,6  " "236,0  " "1     53  "
    "1     54  " "75.486  "  "34,7  "  "20,8  "  "13,8  "  "459,1  "   "275,6  " "182,3  " "1     54  "
    "1     55  " "178.137  " "97,6  "  "48,6  "  "48,6  "  "547,8  "   "273,0  " "272,5  " "1     55  "
    "2     1  "  "158.974  " "126,6  " "65,8  "  "60,8  "  "796,2  "   "413,8  " "382,4  " "2     1  " 
    "2     2  "  "115.220  " "95,7  "  "61,4  "  "34,3  "  "830,9  "   "533,0  " "297,8  " "2     2  " 
    "2     3  "  "72.227  "  "63,0  "  "37,8  "  "25,1  "  "872,0  "   "523,8  " "348,2  " "2     3  " 
    "2     4  "  "84.932  "  "68,1  "  "43,7  "  "24,4  "  "801,5  "   "514,0  " "287,5  " "2     4  " 
    "2     5  "  "178.847  " "147,2  " "91,4  "  "55,7  "  "823,0  "   "511,3  " "311,6  " "2     5  " 
    "2     6  "  "119.467  " "97,6  "  "66,1  "  "31,5  "  "816,8  "   "553,2  " "263,7  " "2     6  " 
    "2     7  "  "110.336  " "89,4  "  "54,9  "  "34,5  "  "810,2  "   "497,6  " "312,6  " "2     7  " 
    "2     8  "  "156.541  " "116,6  " "71,7  "  "45,0  "  "745,0  "   "457,8  " "287,2  " "2     8  " 
    "2     9  "  "137.256  " "110,7  " "66,1  "  "44,6  "  "806,4  "   "481,3  " "325,1  " "2     9  " 
    "2     10  " "81.152  "  "60,5  "  "39,0  "  "21,6  "  "745,9  "   "480,1  " "265,8  " "2     10  "
    "2     11  " "67.179  "  "53,8  "  "33,5  "  "20,3  "  "800,5  "   "498,3  " "302,2  " "2     11  "
    "2     12  " "90.490  "  "74,0  "  "43,3  "  "30,6  "  "817,4  "   "479,0  " "338,4  " "2     12  "
    "2     13  " "125.714  " "118,5  " "50,7  "  "67,7  "  "942,3  "   "403,5  " "538,8  " "2     13  "
    "2     14  " "65.073  "  "44,7  "  "33,4  "  "11,3  "  "686,9  "   "512,8  " "174,1  " "2     14  "
    "2     15  " "95.858  "  "73,6  "  "48,4  "  "25,2  "  "767,5  "   "504,8  " "262,7  " "2     15  "
    "2     16  " "143.874  " "103,3  " "66,6  "  "36,6  "  "717,7  "   "463,0  " "254,7  " "2     16  "
    "2     17  " "143.962  " "117,5  " "76,6  "  "40,9  "  "816,4  "   "532,3  " "284,1  " "2     17  "
    "2     18  " "118.926  " "99,9  "  "66,1  "  "33,7  "  "839,8  "   "556,1  " "283,7  " "2     18  "
    "2     19  " "133.655  " "104,2  " "68,1  "  "36,1  "  "779,6  "   "509,7  " "269,9  " "2     19  "
    "2     20  " "101.979  " "83,0  "  "54,8  "  "28,2  "  "814,2  "   "537,7  " "276,5  " "2     20  "
    "2     21  " "79.056  "  "60,1  "  "40,5  "  "19,6  "  "759,7  "   "512,3  " "247,4  " "2     21  "
    "2     22  " "91.469  "  "62,8  "  "40,5  "  "22,4  "  "686,9  "   "442,5  " "244,4  " "2     22  "
    "2     23  " "64.084  "  "48,5  "  "29,4  "  "19,1  "  "757,3  "   "459,0  " "298,3  " "2     23  "
    "2     24  " "64.335  "  "47,1  "  "32,1  "  "15,0  "  "731,8  "   "498,3  " "233,5  " "2     24  "
    "2     25  " "65.039  "  "50,3  "  "34,5  "  "15,8  "  "773,6  "   "531,1  " "242,5  " "2     25  "
    "2     26  " "137.299  " "102,5  " "66,6  "  "35,9  "  "746,7  "   "485,1  " "261,6  " "2     26  "
    "2     27  " "126.873  " "97,2  "  "67,0  "  "30,2  "  "766,1  "   "527,9  " "238,2  " "2     27  "
    "2     28  " "138.660  " "101,9  " "66,3  "  "35,6  "  "734,7  "   "478,2  " "256,5  " "2     28  "
    "2     29  " "65.019  "  "51,1  "  "36,0  "  "15,1  "  "785,4  "   "553,3  " "232,1  " "2     29  "
    "2     30  " "71.763  "  "55,7  "  "39,4  "  "16,3  "  "776,0  "   "548,5  " "227,5  " "2     30  "
    "2     31  " "76.017  "  "58,1  "  "35,8  "  "22,4  "  "764,7  "   "470,4  " "294,3  " "2     31  "
    "2     32  " "95.003  "  "73,7  "  "40,6  "  "33,1  "  "776,2  "   "427,4  " "348,8  " "2     32  "
    "2     33  " "63.474  "  "52,0  "  "35,0  "  "17,0  "  "819,2  "   "551,1  " "268,1  " "2     33  "
    "2     34  " "59.811  "  "46,0  "  "32,4  "  "13,5  "  "768,3  "   "542,1  " "226,2  " "2     34  "
    "2     35  " "79.540  "  "56,8  "  "39,5  "  "17,3  "  "713,6  "   "496,3  " "217,2  " "2     35  "
    "2     36  " "82.139  "  "61,4  "  "40,4  "  "21,0  "  "748,0  "   "492,3  " "255,7  " "2     36  "
    "2     37  " "75.499  "  "57,0  "  "35,2  "  "21,8  "  "755,3  "   "466,1  " "289,2  " "2     37  "
    "2     38  " "161.334  " "108,5  " "65,6  "  "42,9  "  "672,4  "   "406,5  " "265,9  " "2     38  "
    "2     39  " "90.714  "  "70,3  "  "50,3  "  "20,0  "  "774,8  "   "554,4  " "220,4  " "2     39  "
    "2     40  " "81.358  "  "62,4  "  "44,8  "  "17,5  "  "766,7  "   "551,1  " "215,6  " "2     40  "
    "2     41  " "119.183  " "90,1  "  "56,0  "  "34,1  "  "755,6  "   "469,5  " "286,1  " "2     41  "
    "2     42  " "114.942  " "88,5  "  "56,9  "  "31,6  "  "769,8  "   "495,2  " "274,6  " "2     42  "
    "2     43  " "95.566  "  "71,1  "  "45,5  "  "25,6  "  "743,5  "   "475,8  " "267,7  " "2     43  "
    "2     44  " "80.939  "  "63,4  "  "38,5  "  "24,9  "  "783,1  "   "475,6  " "307,5  " "2     44  "
    "2     45  " "72.098  "  "54,0  "  "35,7  "  "18,2  "  "748,4  "   "495,3  " "253,1  " "2     45  "
    "2     46  " "92.264  "  "71,0  "  "52,9  "  "18,1  "  "769,7  "   "573,3  " "196,3  " "2     46  "
    end

  • #2
    Falco:
    Code:
    . g alfa=strrtrim(dist)
    
    . split alfa
    variables created as string:
    alfa1  alfa2
    
    . list alfa*
    
         +--------------------------+
         |     alfa   alfa1   alfa2 |
         |--------------------------|
      1. |  1     1       1       1 |
      2. |  1     2       1       2 |
      3. |  1     3       1       3 |
      4. |  1     4       1       4 |
      5. |  1     5       1       5 |
         |--------------------------|
      6. |  1     6       1       6 |
      7. |  1     7       1       7 |
      8. |  1     8       1       8 |
      9. |  1     9       1       9 |
     10. | 1     10       1      10 |
         |--------------------------|
     11. | 1     11       1      11 |
     12. | 1     12       1      12 |
     13. | 1     13       1      13 |
     14. | 1     14       1      14 |
     15. | 1     15       1      15 |
         |--------------------------|
     16. | 1     16       1      16 |
     17. | 1     17       1      17 |
     18. | 1     18       1      18 |
     19. | 1     19       1      19 |
     20. | 1     20       1      20 |
         |--------------------------|
     21. | 1     21       1      21 |
     22. | 1     22       1      22 |
     23. | 1     23       1      23 |
     24. | 1     25       1      25 |
     25. | 1     26       1      26 |
         |--------------------------|
     26. | 1     27       1      27 |
     27. | 1     28       1      28 |
     28. | 1     29       1      29 |
     29. | 1     30       1      30 |
     30. | 1     31       1      31 |
         |--------------------------|
     31. | 1     32       1      32 |
     32. | 1     33       1      33 |
     33. | 1     34       1      34 |
     34. | 1     35       1      35 |
     35. | 1     36       1      36 |
         |--------------------------|
     36. | 1     37       1      37 |
     37. | 1     38       1      38 |
     38. | 1     39       1      39 |
     39. | 1     40       1      40 |
     40. | 1     41       1      41 |
         |--------------------------|
     41. | 1     42       1      42 |
     42. | 1     43       1      43 |
     43. | 1     44       1      44 |
     44. | 1     45       1      45 |
     45. | 1     46       1      46 |
         |--------------------------|
     46. | 1     47       1      47 |
     47. | 1     48       1      48 |
     48. | 1     49       1      49 |
     49. | 1     50       1      50 |
     50. | 1     51       1      51 |
         |--------------------------|
     51. | 1     52       1      52 |
     52. | 1     53       1      53 |
     53. | 1     54       1      54 |
     54. | 1     55       1      55 |
     55. |  2     1       2       1 |
         |--------------------------|
     56. |  2     2       2       2 |
     57. |  2     3       2       3 |
     58. |  2     4       2       4 |
     59. |  2     5       2       5 |
     60. |  2     6       2       6 |
         |--------------------------|
     61. |  2     7       2       7 |
     62. |  2     8       2       8 |
     63. |  2     9       2       9 |
     64. | 2     10       2      10 |
     65. | 2     11       2      11 |
         |--------------------------|
     66. | 2     12       2      12 |
     67. | 2     13       2      13 |
     68. | 2     14       2      14 |
     69. | 2     15       2      15 |
     70. | 2     16       2      16 |
         |--------------------------|
     71. | 2     17       2      17 |
     72. | 2     18       2      18 |
     73. | 2     19       2      19 |
     74. | 2     20       2      20 |
     75. | 2     21       2      21 |
         |--------------------------|
     76. | 2     22       2      22 |
     77. | 2     23       2      23 |
     78. | 2     24       2      24 |
     79. | 2     25       2      25 |
     80. | 2     26       2      26 |
         |--------------------------|
     81. | 2     27       2      27 |
     82. | 2     28       2      28 |
     83. | 2     29       2      29 |
     84. | 2     30       2      30 |
     85. | 2     31       2      31 |
         |--------------------------|
     86. | 2     32       2      32 |
     87. | 2     33       2      33 |
     88. | 2     34       2      34 |
     89. | 2     35       2      35 |
     90. | 2     36       2      36 |
         |--------------------------|
     91. | 2     37       2      37 |
     92. | 2     38       2      38 |
     93. | 2     39       2      39 |
     94. | 2     40       2      40 |
     95. | 2     41       2      41 |
         |--------------------------|
     96. | 2     42       2      42 |
     97. | 2     43       2      43 |
     98. | 2     44       2      44 |
     99. | 2     45       2      45 |
    100. | 2     46       2      46 |
         +--------------------------+
    
    .
    Kind regards,
    Carlo
    (StataNow 18.5)

    Comment


    • #3
      Dear Falco,

      You can try the following command (split)

      split dist, g(newvar) p(" ") l(2) destring

      I found that there are 5 spaces between the two numbers.. so I have p(" ") <--- make sure you have 5 spaces between " "

      For further info.
      help split

      Maybe other statalisters have better solutions. Looking forward for their advice.

      Cheers

      Khairul Kamarudin



      Comment


      • #4
        Originally posted by Carlo Lazzaro View Post
        Falco:
        Code:
        . g alfa=strrtrim(dist)
        
        . split alfa
        variables created as string:
        alfa1 alfa2
        
        . list alfa*
        
        +--------------------------+
        | alfa alfa1 alfa2 |
        |--------------------------|
        1. | 1 1 1 1 |
        2. | 1 2 1 2 |
        3. | 1 3 1 3 |
        4. | 1 4 1 4 |
        5. | 1 5 1 5 |
        |--------------------------|
        6. | 1 6 1 6 |
        7. | 1 7 1 7 |
        8. | 1 8 1 8 |
        9. | 1 9 1 9 |
        10. | 1 10 1 10 |
        |--------------------------|
        11. | 1 11 1 11 |
        12. | 1 12 1 12 |
        13. | 1 13 1 13 |
        14. | 1 14 1 14 |
        15. | 1 15 1 15 |
        |--------------------------|
        16. | 1 16 1 16 |
        17. | 1 17 1 17 |
        18. | 1 18 1 18 |
        19. | 1 19 1 19 |
        20. | 1 20 1 20 |
        |--------------------------|
        21. | 1 21 1 21 |
        22. | 1 22 1 22 |
        23. | 1 23 1 23 |
        24. | 1 25 1 25 |
        25. | 1 26 1 26 |
        |--------------------------|
        26. | 1 27 1 27 |
        27. | 1 28 1 28 |
        28. | 1 29 1 29 |
        29. | 1 30 1 30 |
        30. | 1 31 1 31 |
        |--------------------------|
        31. | 1 32 1 32 |
        32. | 1 33 1 33 |
        33. | 1 34 1 34 |
        34. | 1 35 1 35 |
        35. | 1 36 1 36 |
        |--------------------------|
        36. | 1 37 1 37 |
        37. | 1 38 1 38 |
        38. | 1 39 1 39 |
        39. | 1 40 1 40 |
        40. | 1 41 1 41 |
        |--------------------------|
        41. | 1 42 1 42 |
        42. | 1 43 1 43 |
        43. | 1 44 1 44 |
        44. | 1 45 1 45 |
        45. | 1 46 1 46 |
        |--------------------------|
        46. | 1 47 1 47 |
        47. | 1 48 1 48 |
        48. | 1 49 1 49 |
        49. | 1 50 1 50 |
        50. | 1 51 1 51 |
        |--------------------------|
        51. | 1 52 1 52 |
        52. | 1 53 1 53 |
        53. | 1 54 1 54 |
        54. | 1 55 1 55 |
        55. | 2 1 2 1 |
        |--------------------------|
        56. | 2 2 2 2 |
        57. | 2 3 2 3 |
        58. | 2 4 2 4 |
        59. | 2 5 2 5 |
        60. | 2 6 2 6 |
        |--------------------------|
        61. | 2 7 2 7 |
        62. | 2 8 2 8 |
        63. | 2 9 2 9 |
        64. | 2 10 2 10 |
        65. | 2 11 2 11 |
        |--------------------------|
        66. | 2 12 2 12 |
        67. | 2 13 2 13 |
        68. | 2 14 2 14 |
        69. | 2 15 2 15 |
        70. | 2 16 2 16 |
        |--------------------------|
        71. | 2 17 2 17 |
        72. | 2 18 2 18 |
        73. | 2 19 2 19 |
        74. | 2 20 2 20 |
        75. | 2 21 2 21 |
        |--------------------------|
        76. | 2 22 2 22 |
        77. | 2 23 2 23 |
        78. | 2 24 2 24 |
        79. | 2 25 2 25 |
        80. | 2 26 2 26 |
        |--------------------------|
        81. | 2 27 2 27 |
        82. | 2 28 2 28 |
        83. | 2 29 2 29 |
        84. | 2 30 2 30 |
        85. | 2 31 2 31 |
        |--------------------------|
        86. | 2 32 2 32 |
        87. | 2 33 2 33 |
        88. | 2 34 2 34 |
        89. | 2 35 2 35 |
        90. | 2 36 2 36 |
        |--------------------------|
        91. | 2 37 2 37 |
        92. | 2 38 2 38 |
        93. | 2 39 2 39 |
        94. | 2 40 2 40 |
        95. | 2 41 2 41 |
        |--------------------------|
        96. | 2 42 2 42 |
        97. | 2 43 2 43 |
        98. | 2 44 2 44 |
        99. | 2 45 2 45 |
        100. | 2 46 2 46 |
        +--------------------------+
        
        .
        This is a better solution.. strrtrim the spaces first..

        Comment


        • #5
          You also have issues with your other strings: dots (".") for thousand separator and comma (",") for decimal points.

          You could clean up all of this with:
          Code:
          ds, has(type string) 
          local strvars "`r(varlist)'"
          foreach var of local strvars {
              replace `var'=strtrim(`var')
              replace `var'=stritrim(`var')
              replace `var'=subinstr(`var',".","",.)    
          }
          split dist, g(newvar)
          destring, replace dpcomma

          Comment


          • #6
            This is just to underline that with split (please take this as a declaration of interest) multiple spaces matter no more than single spaces if parsing is on spaces.

            Code:
            clear 
            set obs 1
            generate str whatever = "frog       toad" in 1
            set obs 2
            replace whatever = "frog toad" in 2
            
            . split whatever
            variables created as string: 
            whatever1  whatever2
            
            . gen length1 = length(whatever1)
            
            . gen length2 = length(whatever2)
            
            . l
            
                 +-----------------------------------------------------------+
                 |        whatever   whatev~1   whatev~2   length1   length2 |
                 |-----------------------------------------------------------|
              1. | frog       toad       frog       toad         4         4 |
              2. |       frog toad       frog       toad         4         4 |
                 +-----------------------------------------------------------+

            Comment


            • #7
              Thank you all for your inputs.

              For some reason, it doesn't split my dist variable in two.

              I tried the code Carlo Lazaro posted:
              g alfa=strrtrim(dist)
              split alfa
              However, this prints out the variable below alfa1, and doesn't actually split my variable dist into two variables. However, when I use the dataex printed above: it works! But i have no other values than

              1 1
              1 2
              .. ..
              .. ..
              10 237

              Here below is what it prints: just an exact copy, and not an actual split.
              With using Jorrit's code the exact same thing happens: a copy of the splited variable appears, but no splits. But if I use the dataex version I sent, both codes work.

              Do any of you know what could be the problem?

              Code:
              * Example generated by -dataex-. To install: ssc install dataex
              clear
              input str105 dist str21 Superfície_total   str45 Valorcadastral str13 Valorsuelo str17 Valorconsserv str26 valorcadastral str13 valorsuelo str17 Valorconstruccion str19(alfa alfa1)
              "1     1  "  "281.185  " "260,5  " "142,6  " "117,9  " "926,4  " "507,2  " "419,2  " "1     1  "  "1     1  "
              "1     2  "  "105.262  " "53,3  "  "30,7  "  "21,0  "  "506,3  " "292,0  " "199,4  " "1     2  "  "1     2  "
              "1     3  "  "142.322  " "95,9  "  "48,4  "  "47,4  "  "673,8  " "339,7  " "332,9  " "1     3  "  "1     3  "
              "1     4  "  "210.574  " "121,4  " "57,5  "  "63,9  "  "576,6  " "273,1  " "303,5  " "1     4  "  "1     4  "
              "1     5  "  "227.173  " "137,0  " "65,9  "  "71,1  "  "603,2  " "290,1  " "313,1  " "1     5  "  "1     5  "
              "1     6  "  "130.012  " "63,6  "  "38,6  "  "25,1  "  "489,5  " "296,8  " "192,7  " "1     6  "  "1     6  "
              "1     7  "  "98.810  "  "56,5  "  "31,0  "  "25,5  "  "571,8  " "313,3  " "258,5  " "1     7  "  "1     7  "
              "1     8  "  "98.802  "  "44,4  "  "27,9  "  "16,6  "  "449,6  " "282,1  " "167,6  " "1     8  "  "1     8  "
              "1     9  "  "91.118  "  "45,8  "  "28,2  "  "17,6  "  "502,5  " "309,6  " "192,9  " "1     9  "  "1     9  "
              "1     10  " "94.147  "  "42,1  "  "26,1  "  "16,0  "  "446,7  " "277,2  " "169,5  " "1     10  " "1     10  "
              end
              Last edited by Falco Wolf; 21 Feb 2019, 01:05. Reason: Edited for further clarity.

              Comment


              • #8
                Falco, I did it as split dist, gen(new) destring

                Code:
                help split
                split dist, gen(new) destring
                /*
                variables born as string: 
                new1  new2
                new1: all characters numeric; replaced as byte
                new2: all characters numeric; replaced as byte
                */
                . desc dist new1 new2
                
                              storage   display    value
                variable name   type    format     label      variable label
                ---------------------------------------------------------------------------------------------------------------------------------------------
                dist            str105  %105s                 
                new1            byte    %10.0g                
                new2            byte    %10.0g                
                
                . list dist new1 new2 in 1/20, divider
                
                     +--------------------------+
                     |       dist | new1 | new2 |
                     |------------+------+------|
                  1. |  1     1   |    1 |    1 |
                  2. |  1     2   |    1 |    2 |
                  3. |  1     3   |    1 |    3 |
                  4. |  1     4   |    1 |    4 |
                  5. |  1     5   |    1 |    5 |
                     |------------+------+------|
                  6. |  1     6   |    1 |    6 |
                  7. |  1     7   |    1 |    7 |
                  8. |  1     8   |    1 |    8 |
                  9. |  1     9   |    1 |    9 |
                 10. | 1     10   |    1 |   10 |
                     |------------+------+------|
                 11. | 1     11   |    1 |   11 |
                 12. | 1     12   |    1 |   12 |
                 13. | 1     13   |    1 |   13 |
                 14. | 1     14   |    1 |   14 |
                 15. | 1     15   |    1 |   15 |
                     |------------+------+------|
                 16. | 1     16   |    1 |   16 |
                 17. | 1     17   |    1 |   17 |
                 18. | 1     18   |    1 |   18 |
                 19. | 1     19   |    1 |   19 |
                 20. | 1     20   |    1 |   20 |
                     +--------------------------+

                Comment


                • #9
                  Your data when copied and pasted into Statalist surely are rendered as space-separated; otherwise other people could hardly get the results we do. But if you can't replicate those results, my guess is that you have some other characters in there that look like spaces, say tabs or something more exotic.

                  See the simple charlist (SSC) or the much more versatile chartab (SSC) for tools for finding those characters.

                  https://www.statalist.org/forums/for...equency-counts explains the latter.
                  Last edited by Nick Cox; 21 Feb 2019, 02:21.

                  Comment


                  • #10
                    David, still there is no actual split in my dataset.

                    I have proceeded to use the chartab and this is what it returns:
                    . chartab dist
                    decimal hexadecimal character frequency unique name
                    48 \u0030 0 298 DIGIT ZERO
                    49 \u0031 1 633 DIGIT ONE
                    50 \u0032 2 415 DIGIT TWO
                    51 \u0033 3 353 DIGIT THREE
                    52 \u0034 4 280 DIGIT FOUR
                    53 \u0035 5 310 DIGIT FIVE
                    54 \u0036 6 284 DIGIT SIX
                    55 \u0037 7 311 DIGIT SEVEN
                    56 \u0038 8 300 DIGIT EIGHT
                    57 \u0039 9 261 DIGIT NINE
                    160 \u00a0 7,476 NO-BREAK SPACE
                    freq. count distinct
                    ASCII characters = 3,445 10
                    Multibyte UTF-8 characters = 7,476 1
                    Unicode replacement character = 0 0
                    Total Unicode characters = 10,921 11



                    So I understand i only have numbers from 1-9 and spaces. So split should work, correct?

                    Comment


                    • #11
                      You can see a list of ASCII codes here (but any code <=31 is a non-printing code.) (Line Feed==10, Carriage Return==13).

                      William Lisowski has a clever regex method for cleaning strings that I've used below. His method uses a regular expression that contains a list of acceptable characters and deletes all others. But it requires Stata 14 or later. See here and here.

                      Code:
                      charlist dist
                      di r(ascii)  // shows you what ASCII codes are present
                      
                      * If dist should only contain numbers and spaces
                      generate str dist_clean =  ustrregexra(dist,"[^ 0-9]","")  // note the space between the "^" and the 0.
                      split dist_clean, gen(new) destring
                      
                      * I added some other characters to make sure the regex worked
                      list in 1/10, divide noobs
                      
                        +---------------------------------------+
                        |       dist | dist_clean | new1 | new2 |
                        |------------+------------+------+------|
                        |  1     1 A |   1     1  |    1 |    1 |
                        |   1     2B |    1     2 |    1 |    2 |
                        |   1     3; |    1     3 |    1 |    3 |
                        |  1     4   |  1     4   |    1 |    4 |
                        |  1     5   |  1     5   |    1 |    5 |
                        |------------+------------+------+------|
                        |  1     6   |  1     6   |    1 |    6 |
                        |  1     7   |  1     7   |    1 |    7 |
                        |  1     8   |  1     8   |    1 |    8 |
                        |  1     9   |  1     9   |    1 |    9 |
                        | 1     10   | 1     10   |    1 |   10 |
                        +---------------------------------------+



                      Comment


                      • #12
                        As surmised in #9 the problem lies in characters other than spaces.

                        The help for split has a detailed example for splitting on char(9) that will suggest code for splitting on char(160). char(160) is not the standard space. I can't see that regular expressions are needed.

                        Comment


                        • #13
                          If the spaces with all your other variables are these non-breaking spaces as well, you can fix those with:

                          Code:
                          ds, has(type string) 
                          local strvars "`r(varlist)'"
                          foreach var of local strvars {
                              replace `var'=ustrtrim(`var')
                              replace `var'=stritrim(`var')
                              replace `var'=subinstr(`var',".","",.)    
                          }
                          split dist, g(newvar)
                          destring, replace dpcomma
                          With u for unicode.
                          Unfortunately, there is no ustritrim function. Not sure why that is, would have been helpful here.

                          Comment


                          • #14
                            Demonstration:

                            Code:
                            . clear
                            
                            . set obs 1
                            number of observations (_N) was 0, now 1
                            
                            . gen whatever = "1" + char(160) + "2"
                            
                            . split whatever
                            variable created as string: 
                            whatever1
                            
                            . split whatever, parse(`=char(160)') gen(tryagain)
                            variables created as string: 
                            tryagain1  tryagain2
                            
                            . l
                            
                                 +-------------------------------------------+
                                 | whatever   whatev~1   tryaga~1   tryaga~2 |
                                 |-------------------------------------------|
                              1. |      1�2        1�2          1          2 |
                                 +-------------------------------------------+

                            Comment


                            • #15
                              None of the resolutions seem to be working. I tried using the "split, parse" but the results have always been missing values or what I have below (which is Bensons code).

                              I dropped the first 100 observations, so you can have character 160 in the dataex below.

                              Is there a way i could upload the .dta file on here ?

                              Code:
                              * Example generated by -dataex-. To install: ssc install dataex
                              clear
                              input str105 dist int new1 str5 dist_clean int aa1
                              "2     47"   247 "247"   247
                              "2     48"   248 "248"   248
                              "2     49"   249 "249"   249
                              "2     50"   250 "250"   250
                              "2     51"   251 "251"   251
                              "2     52"   252 "252"   252
                              "2     53"   253 "253"   253
                              "2     54"   254 "254"   254
                              "2     55"   255 "255"   255
                              "2     56"   256 "256"   256
                              "2     57"   257 "257"   257
                              "2     58"   258 "258"   258
                              "2     59"   259 "259"   259
                              "2     60"   260 "260"   260
                              "2     61"   261 "261"   261
                              "2     62"   262 "262"   262
                              "2     63"   263 "263"   263
                              "2     64"   264 "264"   264
                              "2     65"   265 "265"   265
                              "2     66"   266 "266"   266
                              "2     67"   267 "267"   267
                              "2     68"   268 "268"   268
                              "2     69"   269 "269"   269
                              "2     70"   270 "270"   270
                              "2     71"   271 "271"   271
                              "2     72"   272 "272"   272
                              "2     73"   273 "273"   273
                              "2     74"   274 "274"   274
                              "2     75"   275 "275"   275
                              "2     76"   276 "276"   276
                              "2     77"   277 "277"   277
                              "2     78"   278 "278"   278
                              "2     79"   279 "279"   279
                              "2     80"   280 "280"   280
                              "2     81"   281 "281"   281
                              "2     82"   282 "282"   282
                              "2     83"   283 "283"   283
                              "2     84"   284 "284"   284
                              "2     85"   285 "285"   285
                              "2     86"   286 "286"   286
                              "2     87"   287 "287"   287
                              "2     88"   288 "288"   288
                              "2     89"   289 "289"   289
                              "2     90"   290 "290"   290
                              "2     91"   291 "291"   291
                              "2     92"   292 "292"   292
                              "2     93"   293 "293"   293
                              "2     94"   294 "294"   294
                              "2     95"   295 "295"   295
                              "2     96"   296 "296"   296
                              "2     97"   297 "297"   297
                              "2     98"   298 "298"   298
                              "2     99"   299 "299"   299
                              "2     100" 2100 "2100" 2100
                              "2     101" 2101 "2101" 2101
                              "2     102" 2102 "2102" 2102
                              "2     103" 2103 "2103" 2103
                              "2     104" 2104 "2104" 2104
                              "2     105" 2105 "2105" 2105
                              "2     106" 2106 "2106" 2106
                              "2     107" 2107 "2107" 2107
                              "2     108" 2108 "2108" 2108
                              "2     109" 2109 "2109" 2109
                              "2     110" 2110 "2110" 2110
                              "2     111" 2111 "2111" 2111
                              "2     112" 2112 "2112" 2112
                              "2     113" 2113 "2113" 2113
                              "2     114" 2114 "2114" 2114
                              "2     115" 2115 "2115" 2115
                              "2     116" 2116 "2116" 2116
                              "2     117" 2117 "2117" 2117
                              "2     118" 2118 "2118" 2118
                              "2     119" 2119 "2119" 2119
                              "2     120" 2120 "2120" 2120
                              "2     121" 2121 "2121" 2121
                              "2     122" 2122 "2122" 2122
                              "2     123" 2123 "2123" 2123
                              "2     124" 2124 "2124" 2124
                              "2     125" 2125 "2125" 2125
                              "2     126" 2126 "2126" 2126
                              "2     127" 2127 "2127" 2127
                              "2     128" 2128 "2128" 2128
                              "2     129" 2129 "2129" 2129
                              "2     130" 2130 "2130" 2130
                              "2     131" 2131 "2131" 2131
                              "2     132" 2132 "2132" 2132
                              "2     133" 2133 "2133" 2133
                              "2     134" 2134 "2134" 2134
                              "2     135" 2135 "2135" 2135
                              "2     136" 2136 "2136" 2136
                              "2     137" 2137 "2137" 2137
                              "2     138" 2138 "2138" 2138
                              "2     139" 2139 "2139" 2139
                              "2     140" 2140 "2140" 2140
                              "2     141" 2141 "2141" 2141
                              "2     142" 2142 "2142" 2142
                              "2     143" 2143 "2143" 2143
                              "2     144" 2144 "2144" 2144
                              "2     145" 2145 "2145" 2145
                              "2     146" 2146 "2146" 2146
                              end

                              Comment

                              Working...
                              X