Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Assiging the same unique ID to some values that share the same set of values

    Hello,

    My goal is to assign the same unique ID to "agroup" and "aiso3c" values that share the same set of "id" and "prope". This is the dataset I have:

    Code:
    * Example generated by -dataex-. For more info, type help dataex
    clear
    input str3 aiso3c str40 agroup str7 id str18 prope
    "MLI" "Arabe"               "mey-MLI" "0.0277777777777778"
    "MLI" "Arabic"              "mey-MLI" "0.0277777777777778"
    "MLI" "Bambara"             "bam-MLI" "1"                 
    "MLI" "Bella"               "taq-MLI" "0.25"              
    "MLI" "Bella"               "ttq-MLI" "0.25"              
    "MLI" "Bobo"                "bmq-MLI" "1"                 
    "MLI" "Bozo"                "boo-MLI" "0.142857142857143" 
    "MLI" "Bozo"                "boz-MLI" "0.142857142857143" 
    "MLI" "Bozo"                "bzx-MLI" "0.142857142857143" 
    "MLI" "Bozo"                "bze-MLI" "0.571428571428571" 
    "MLI" "Dafing"              "rkm-MLI" "1"                 
    "MLI" "Dogon"               "dkl-MLI" "0.0869565217391304"
    "MLI" "Dogon"               "djm-MLI" "0.173913043478261" 
    "MLI" "Dogon"               "dbg-MLI" "0.0434782608695652"
    "MLI" "Dogon"               "dds-MLI" "0.0869565217391304"
    "MLI" "Dogon"               "dtm-MLI" "0.0434782608695652"
    "MLI" "Dogon"               "dts-MLI" "0.0434782608695652"
    "MLI" "Dogon"               "dtt-MLI" "0.0434782608695652"
    "MLI" "Dogon"               "dtk-MLI" "0.173913043478261" 
    "MLI" "Dogon"               "dbu-MLI" "0.0869565217391304"
    "MLI" "French"              "NA"      "NA"                
    "MLI" "Fulfulde/Peul"       "ffm-MLI" "0.0833333333333333"
    "MLI" "Fulfulde/Peul"       "fuf-MLI" "0.0833333333333333"
    "MLI" "Fulfulde/Peul"       "fuc-MLI" "0.0833333333333333"
    "MLI" "Haoussa"             "NA"      "NA"                
    "MLI" "Kakolo"              "xkg-MLI" "1"                 
    "MLI" "Khasonk"             "kao-MLI" "1"                 
    "MLI" "Khasonke"            "kao-MLI" "1"                 
    "MLI" "Khassonke"           "kao-MLI" "1"                 
    "MLI" "Malink"              "bam-MLI" "0.16"              
    "MLI" "Malink"              "mwk-MLI" "0.02"              
    "MLI" "Malink"              "rkm-MLI" "0.06"              
    "MLI" "Malink"              "jad-MLI" "0.02"              
    "MLI" "Malink"              "mlq-MLI" "0.02"              
    "MLI" "Malink"              "kao-MLI" "0.02"              
    "MLI" "Malink"              "xkg-MLI" "0.02"              
    "MLI" "Malinke"             "bam-MLI" "0.16"              
    "MLI" "Malinke"             "mwk-MLI" "0.02"              
    "MLI" "Malinke"             "rkm-MLI" "0.06"              
    "MLI" "Malinke"             "jad-MLI" "0.02"              
    "MLI" "Malinke"             "mlq-MLI" "0.02"              
    "MLI" "Malinke"             "kao-MLI" "0.02"              
    "MLI" "Malinke"             "xkg-MLI" "0.02"              
    "MLI" "Maure"               "mey-MLI" "1"                 
    "MLI" "Mianka"              "myk-MLI" "1"                 
    "MLI" "Mossi"               "NA"      "NA"                
    "MLI" "Peugl/Fulfulde"      "ffm-MLI" "0.0833333333333333"
    "MLI" "Peugl/Fulfulde"      "fuf-MLI" "0.0833333333333333"
    "MLI" "Peugl/Fulfulde"      "fuc-MLI" "0.0833333333333333"
    "MLI" "Peuhl"               "ffm-MLI" "0.0833333333333333"
    "MLI" "Peuhl"               "fuf-MLI" "0.0833333333333333"
    "MLI" "Peuhl"               "fuc-MLI" "0.0833333333333333"
    "MLI" "Peulh / Fulfude"     "ffm-MLI" "0.0833333333333333"
    "MLI" "Peulh / Fulfude"     "fuf-MLI" "0.0833333333333333"
    "MLI" "Peulh / Fulfude"     "fuc-MLI" "0.0833333333333333"
    "MLI" "Peulh/ Fulfulde"     "ffm-MLI" "0.0833333333333333"
    "MLI" "Peulh/ Fulfulde"     "fuf-MLI" "0.0833333333333333"
    "MLI" "Peulh/ Fulfulde"     "fuc-MLI" "0.0833333333333333"
    "MLI" "Peulh/Fulfude"       "ffm-MLI" "0.0833333333333333"
    "MLI" "Peulh/Fulfude"       "fuf-MLI" "0.0833333333333333"
    "MLI" "Peulh/Fulfude"       "fuc-MLI" "0.0833333333333333"
    "MLI" "Samogo"              "bxw-MLI" "0.142857142857143" 
    "MLI" "Samogo"              "dux-MLI" "0.142857142857143" 
    "MLI" "Samogo"              "jow-MLI" "0.142857142857143" 
    "MLI" "Samoko"              "bxw-MLI" "0.142857142857143" 
    "MLI" "Samoko"              "dux-MLI" "0.142857142857143" 
    "MLI" "Samoko"              "jow-MLI" "0.142857142857143" 
    "MLI" "Senufo"              "myk-MLI" "0.160714285714286" 
    "MLI" "Senufo"              "spp-MLI" "0.0714285714285714"
    "MLI" "Senufo"              "sep-MLI" "0.0178571428571429"
    "MLI" "Senufo"              "shz-MLI" "0.0178571428571429"
    "MLI" "Senufo/ Mianka"      "myk-MLI" "0.160714285714286" 
    "MLI" "Senufo/ Mianka"      "spp-MLI" "0.0714285714285714"
    "MLI" "Senufo/ Mianka"      "sep-MLI" "0.0178571428571429"
    "MLI" "Senufo/ Mianka"      "shz-MLI" "0.0178571428571429"
    "MLI" "Sonink"              "snk-MLI" "1"                 
    "MLI" "Soninke"             "snk-MLI" "1"                 
    "MLI" "Soninke / Sarakolle" "snk-MLI" "1"                 
    "MLI" "Soninke/ Sarakoll"   "snk-MLI" "1"                 
    "MLI" "Soninke/Sarakole"    "snk-MLI" "1"                 
    "MLI" "Sonrha"              "khq-MLI" "0.2"               
    "MLI" "Sonrha"              "ses-MLI" "0.2"               
    "MLI" "Sonrha"              "dje-MLI" "0.1"               
    "MLI" "Sonrha"              "hmb-MLI" "0.1"               
    "MLI" "Sonrha"              "dsq-MLI" "0.1"               
    "MLI" "Sonrhai"             "khq-MLI" "0.2"               
    "MLI" "Sonrhai"             "ses-MLI" "0.2"               
    "MLI" "Sonrhai"             "dje-MLI" "0.1"               
    "MLI" "Sonrhai"             "hmb-MLI" "0.1"               
    "MLI" "Sonrhai"             "dsq-MLI" "0.1"               
    "MLI" "Tamasheq"            "taq-MLI" "0.25"              
    "MLI" "Tamasheq"            "ttq-MLI" "0.25"              
    "MLI" "Wolof"               "NA"      "NA"                
    "MLI" "Zerma"               "dje-MLI" "1"                 
    end
    I am getting crazy. This is an example of the type of code I have in mind:

    Code:
    sort aiso3c agroup id prope
    egen bg_ei = concat(id prope)
    sort aiso3c agroup bg_ei
    by aiso3c agroup: gen group_summary = group(bg_ei)
    bysort group_summary: gen unique_id = _n == 1
    However, it does not work. Any help? Thank you!

  • #2
    There is a group() function in Stata but it's long since undocumented and it doesn't do what you want. I assume that you wanted either the group() function or the tag() function of egen -- noting that egen functions can't be accessed using gen (for generate).

    Code:
    egen group_id = group(aiso3c agroup id prope) 
    egen group_tag = tag(aiso3c agroup id prope)
    
    list group_id aiso3c agroup id prope if group_tag
    The result is not shown here and may not be what you really want, but it's what you seem to be asking for.

    Comment


    • #3
      Diego:
      I am not clear what you're after so please consider what follows as a tentative reply:
      Code:
      . egen pre=group( id prope )
      
      . sort pre
      Kind regards,
      Carlo
      (StataNow 18.5)

      Comment


      • #4
        Dear Nick Cox and Carlo Lazzaro,

        Thank you for your replies. I tried your suggestions and it is not generating what I am asking for. Maybe I did not explain totally clear.

        In my data: the set of values "MLI" and "Peugl/Fulfulde", and "MLI" and "Peuhl" shared the same set of values by id and prope, that is, both of them share ffm-MLI, fuf-MLI, and fuc-MLI as well as the same values in prope.

        Then, what I am looking for is that, "MLI" and "Peugl/Fulfulde", and "MLI" and "Peuhl" have the same unique ID. I hope it is clear now. I am breaking my head trying to get that.

        Comment


        • #5
          I think I understand what you want and my code shows you how to do it and @Carlo Lazzaro's suggestion is in the same spriit.

          egen, group() will map several observations to the same integer identifier if, and only if, observations are identical on all variables named.

          Your data example has many similarities but no two observations are identical on all 4 variables.

          Comment


          • #6
            Thank Nick Cox for the answer.

            I still think there is a bit of misunderstanding. I do not really want that observations are identifical on all variables named, in that case, the code you suggested work. The example I gave in my comment are not identical in all variables named, but I want to force them to have the same ID as they shared the same set of values in id and prope.
            Last edited by Diego Malo; 10 Jun 2024, 03:29.

            Comment


            • #7
              Understood now, I think. In that case values of agroup and aiso3c are irrelevant -- so why mention them at all? -- and the code of Carlo Lazzaro appears to be what you seek.
              Last edited by Nick Cox; 10 Jun 2024, 04:23.

              Comment


              • #8
                Thank you again for your answer Nick Cox . Greatly appreciate.

                The code of Carlo Lazzaro is not working because is giving a different ID for the same "agroup" "aiso3c" who share the same set of values by id and prope. For example, "Peugl/Fulfulde" should have the same ID, with the command of Carlo, "Peugl/Fulfulde" have a different ID depending on the values of prope and id.

                I need to assign the same ID to combinations of "agroup" and "aiso3c" that share the same set of "prope" and "id" values. I am not able to explain it better, so I have generated manually the variable I want "unique_id" in excel to try make things clearer and after I have imported it to stata:


                Code:
                * Example generated by -dataex-. For more info, type help dataex
                clear
                input str3 aiso3c str19 agroup str7 bgroup str17 eifraca int pre byte unique_id
                "MLI" "Khasonke"            "kao-MLI" "1"                 458  1
                "MLI" "Khassonke"           "kao-MLI" "1"                 458  1
                "MLI" "Malink"              "bam-MLI" ".16"               430  2
                "MLI" "Malink"              "mwk-MLI" ".02"               463  2
                "MLI" "Malink"              "rkm-MLI" ".06"               466  2
                "MLI" "Malink"              "jad-MLI" ".02"               455  2
                "MLI" "Malink"              "mlq-MLI" ".02"               462  2
                "MLI" "Malink"              "kao-MLI" ".02"               457  2
                "MLI" "Malink"              "xkg-MLI" ".02"               475  2
                "MLI" "Malinke"             "bam-MLI" ".16"               430  2
                "MLI" "Malinke"             "mwk-MLI" ".02"               463  2
                "MLI" "Malinke"             "rkm-MLI" ".06"               466  2
                "MLI" "Malinke"             "jad-MLI" ".02"               455  2
                "MLI" "Malinke"             "mlq-MLI" ".02"               462  2
                "MLI" "Malinke"             "kao-MLI" ".02"               457  2
                "MLI" "Malinke"             "xkg-MLI" ".02"               475  2
                "MLI" "Maure"               "mey-MLI" "1"                 461  3
                "MLI" "Mianka"              "myk-MLI" "1"                 465  3
                "MLI" "Mossi"               "NA"      "NA"                429  4
                "MLI" "Peugl/Fulfulde"      "ffm-MLI" ".0833333333333333" 451  5
                "MLI" "Peugl/Fulfulde"      "fuf-MLI" ".0833333333333333" 453  5
                "MLI" "Peugl/Fulfulde"      "fuc-MLI" ".0833333333333333" 452  5
                "MLI" "Peuhl"               "ffm-MLI" ".0833333333333333" 451  5
                "MLI" "Peuhl"               "fuf-MLI" ".0833333333333333" 453  5
                "MLI" "Peuhl"               "fuc-MLI" ".0833333333333333" 452  5
                "MLI" "Peulh / Fulfude"     "ffm-MLI" ".0833333333333333" 451  5
                "MLI" "Peulh / Fulfude"     "fuf-MLI" ".0833333333333333" 453  5
                "MLI" "Peulh / Fulfude"     "fuc-MLI" ".0833333333333333" 452  5
                "MLI" "Peulh/ Fulfulde"     "ffm-MLI" ".0833333333333333" 451  5
                "MLI" "Peulh/ Fulfulde"     "fuf-MLI" ".0833333333333333" 453  5
                "MLI" "Peulh/ Fulfulde"     "fuc-MLI" ".0833333333333333" 452  5
                "MLI" "Peulh/Fulfude"       "ffm-MLI" ".0833333333333333" 451  5
                "MLI" "Peulh/Fulfude"       "fuf-MLI" ".0833333333333333" 453  5
                "MLI" "Peulh/Fulfude"       "fuc-MLI" ".0833333333333333" 452  5
                "MLI" "Samogo"              "bxw-MLI" ".142857142857143"  435  6
                "MLI" "Samogo"              "dux-MLI" ".142857142857143"  450  6
                "MLI" "Samogo"              "jow-MLI" ".142857142857143"  456  6
                "MLI" "Samoko"              "bxw-MLI" ".142857142857143"  435  6
                "MLI" "Samoko"              "dux-MLI" ".142857142857143"  450  6
                "MLI" "Samoko"              "jow-MLI" ".142857142857143"  456  6
                "MLI" "Senufo"              "myk-MLI" ".160714285714286"  464  7
                "MLI" "Senufo"              "spp-MLI" ".0714285714285714" 472  7
                "MLI" "Senufo"              "sep-MLI" ".0178571428571429" 468  7
                "MLI" "Senufo"              "shz-MLI" ".0178571428571429" 470  7
                "MLI" "Senufo/ Mianka"      "myk-MLI" ".160714285714286"  464  7
                "MLI" "Senufo/ Mianka"      "spp-MLI" ".0714285714285714" 472  7
                "MLI" "Senufo/ Mianka"      "sep-MLI" ".0178571428571429" 468  7
                "MLI" "Senufo/ Mianka"      "shz-MLI" ".0178571428571429" 470  7
                "MLI" "Sonink"              "snk-MLI" "1"                 471  8
                "MLI" "Soninke"             "snk-MLI" "1"                 471  8
                "MLI" "Soninke / Sarakolle" "snk-MLI" "1"                 471  8
                "MLI" "Soninke/ Sarakoll"   "snk-MLI" "1"                 471  8
                "MLI" "Soninke/Sarakole"    "snk-MLI" "1"                 471  8
                "MLI" "Sonrha"              "khq-MLI" ".2"                459  9
                "MLI" "Sonrha"              "ses-MLI" ".2"                469  9
                "MLI" "Sonrha"              "dje-MLI" ".1"                441  9
                "MLI" "Sonrha"              "hmb-MLI" ".1"                454  9
                "MLI" "Sonrha"              "dsq-MLI" ".1"                445  9
                "MLI" "Sonrhai"             "khq-MLI" ".2"                459  9
                "MLI" "Sonrhai"             "ses-MLI" ".2"                469  9
                "MLI" "Sonrhai"             "dje-MLI" ".1"                441  9
                "MLI" "Sonrhai"             "hmb-MLI" ".1"                454  9
                "MLI" "Sonrhai"             "dsq-MLI" ".1"                445  9
                "BWA" "Peulh/Fulfude"       "ffm-MLI" ".0833333333333333" 451 10
                "BWA" "Peulh/Fulfude"       "fuf-MLI" ".0833333333333333" 453 10
                "BWA" "Peulh/Fulfude"       "fuc-MLI" ".0833333333333333" 452 10
                "GHA" "Khasonke"            "kao-MLI" "1"                 458 11
                end


                I am exploring also the option of collapse but I do not get the results I want.





                Last edited by Diego Malo; 10 Jun 2024, 06:33.

                Comment


                • #9
                  Sorry, but I don't understand this either.

                  I didn't get further than the first two observations in #8 which have different values of agroup but you want to assign them the same identifier. Khasonke and Khassonke are different string values to Stata, regardless of whether you can recognise different spellings of the same place or category -- if that is what is going on. Similarly Malink and Malinke are different, and I can't follow why Maure and Mianka are regarded as the same. Yet more puzzles below.

                  This seems to be going in circles. Sorry that I can't spend more time on this, but I am getting more confused and my suggestions don't seem to be helping you.

                  Comment


                  • #10
                    Yes, I completly understand, it is a bit confusing.

                    Khasonke and Khassonke have the same identifier because they have the same values in column prope and id (kao-MLI and 1). The same reason for Malink and Malinke, both of them have the same set of observations in columns id and prope, in the seven observations (rbam-MLI, mwk-MLI, etc). With respect to Maure and Mianka, you are right, they should not have the same unique identifier, as they do not share the same set of values by prope and id (mey-MLI and myk-MLI) I have updated it:


                    Code:
                    * Example generated by -dataex-. For more info, type help dataex
                    clear
                    input str3 aiso3c str19 agroup str7 id str17 prope byte unique_id
                    "MLI" "Khasonke"            "kao-MLI" "1"                  1
                    "MLI" "Khassonke"           "kao-MLI" "1"                  1
                    "MLI" "Malink"              "bam-MLI" ".16"                2
                    "MLI" "Malink"              "mwk-MLI" ".02"                2
                    "MLI" "Malink"              "rkm-MLI" ".06"                2
                    "MLI" "Malink"              "jad-MLI" ".02"                2
                    "MLI" "Malink"              "mlq-MLI" ".02"                2
                    "MLI" "Malink"              "kao-MLI" ".02"                2
                    "MLI" "Malink"              "xkg-MLI" ".02"                2
                    "MLI" "Malinke"             "bam-MLI" ".16"                2
                    "MLI" "Malinke"             "mwk-MLI" ".02"                2
                    "MLI" "Malinke"             "rkm-MLI" ".06"                2
                    "MLI" "Malinke"             "jad-MLI" ".02"                2
                    "MLI" "Malinke"             "mlq-MLI" ".02"                2
                    "MLI" "Malinke"             "kao-MLI" ".02"                2
                    "MLI" "Malinke"             "xkg-MLI" ".02"                2
                    "MLI" "Maure"               "mey-MLI" "1"                  3
                    "MLI" "Mianka"              "myk-MLI" "1"                  4
                    "MLI" "Mossi"               "NA"      "NA"                 5
                    "MLI" "Peugl/Fulfulde"      "ffm-MLI" ".0833333333333333"  6
                    "MLI" "Peugl/Fulfulde"      "fuf-MLI" ".0833333333333333"  6
                    "MLI" "Peugl/Fulfulde"      "fuc-MLI" ".0833333333333333"  6
                    "MLI" "Peuhl"               "ffm-MLI" ".0833333333333333"  6
                    "MLI" "Peuhl"               "fuf-MLI" ".0833333333333333"  6
                    "MLI" "Peuhl"               "fuc-MLI" ".0833333333333333"  6
                    "MLI" "Peulh / Fulfude"     "ffm-MLI" ".0833333333333333"  6
                    "MLI" "Peulh / Fulfude"     "fuf-MLI" ".0833333333333333"  6
                    "MLI" "Peulh / Fulfude"     "fuc-MLI" ".0833333333333333"  6
                    "MLI" "Peulh/ Fulfulde"     "ffm-MLI" ".0833333333333333"  6
                    "MLI" "Peulh/ Fulfulde"     "fuf-MLI" ".0833333333333333"  6
                    "MLI" "Peulh/ Fulfulde"     "fuc-MLI" ".0833333333333333"  6
                    "MLI" "Peulh/Fulfude"       "ffm-MLI" ".0833333333333333"  6
                    "MLI" "Peulh/Fulfude"       "fuf-MLI" ".0833333333333333"  6
                    "MLI" "Peulh/Fulfude"       "fuc-MLI" ".0833333333333333"  6
                    "MLI" "Samogo"              "bxw-MLI" ".142857142857143"   7
                    "MLI" "Samogo"              "dux-MLI" ".142857142857143"   7
                    "MLI" "Samogo"              "jow-MLI" ".142857142857143"   7
                    "MLI" "Samoko"              "bxw-MLI" ".142857142857143"   7
                    "MLI" "Samoko"              "dux-MLI" ".142857142857143"   7
                    "MLI" "Samoko"              "jow-MLI" ".142857142857143"   7
                    "MLI" "Senufo"              "myk-MLI" ".160714285714286"   8
                    "MLI" "Senufo"              "spp-MLI" ".0714285714285714"  8
                    "MLI" "Senufo"              "sep-MLI" ".0178571428571429"  8
                    "MLI" "Senufo"              "shz-MLI" ".0178571428571429"  8
                    "MLI" "Senufo/ Mianka"      "myk-MLI" ".160714285714286"   8
                    "MLI" "Senufo/ Mianka"      "spp-MLI" ".0714285714285714"  8
                    "MLI" "Senufo/ Mianka"      "sep-MLI" ".0178571428571429"  8
                    "MLI" "Senufo/ Mianka"      "shz-MLI" ".0178571428571429"  8
                    "MLI" "Sonink"              "snk-MLI" "1"                  9
                    "MLI" "Soninke"             "snk-MLI" "1"                  9
                    "MLI" "Soninke / Sarakolle" "snk-MLI" "1"                  9
                    "MLI" "Soninke/ Sarakoll"   "snk-MLI" "1"                  9
                    "MLI" "Soninke/Sarakole"    "snk-MLI" "1"                  9
                    "MLI" "Sonrha"              "khq-MLI" ".2"                10
                    "MLI" "Sonrha"              "ses-MLI" ".2"                10
                    "MLI" "Sonrha"              "dje-MLI" ".1"                10
                    "MLI" "Sonrha"              "hmb-MLI" ".1"                10
                    "MLI" "Sonrha"              "dsq-MLI" ".1"                10
                    "MLI" "Sonrhai"             "khq-MLI" ".2"                10
                    "MLI" "Sonrhai"             "ses-MLI" ".2"                10
                    "MLI" "Sonrhai"             "dje-MLI" ".1"                10
                    "MLI" "Sonrhai"             "hmb-MLI" ".1"                10
                    "MLI" "Sonrhai"             "dsq-MLI" ".1"                10
                    "BWA" "Peulh/Fulfude"       "ffm-MLI" ".0833333333333333" 11
                    "BWA" "Peulh/Fulfude"       "fuf-MLI" ".0833333333333333" 11
                    "BWA" "Peulh/Fulfude"       "fuc-MLI" ".0833333333333333" 11
                    "GHA" "Khasonke"            "kao-MLI" "1"                 12
                    end
                    Last edited by Diego Malo; 10 Jun 2024, 07:44.

                    Comment


                    • #11
                      I found the solution!

                      Code:
                      egen group_id = group(aiso3c prope id) 
                      bysort aiso3c agroup: egen unique_id = total(group_id)

                      Comment

                      Working...
                      X