Announcement

Collapse
No announcement yet.
X
  • Filter
  • Time
  • Show
Clear All
new posts

  • Issue with collapse command

    I need your assistance, please. I am having an error while trying to use the collapse command.
    Code:
    . collapse (sum) Tx=failure (count) Nx=spell_interval, by(spell_interval)  // Group by spell_interval
    failed to setup collapse problem
    Below is my data and the full code. My aim is to estimate the contraceptive continuation rate using this method as explained here: Contraceptive continuation rates - DataForImpactProject

    Data:
    Code:
    * Example generated by -dataex-. For more info, type help dataex
    clear
    input str13 uniquegirlid byte(q407_mth1 q407_mth10 q407_mth11 q407_mth12 q407_mth2 q407_mth3 q407_mth4 q407_mth5 q407_mth6 q407_mth7 q407_mth8 q407_mth9)
    "2128_10_2_2"  6 17 17 17 17 17 17 17 17 17 17 17
    "2128_16_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2128_19_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2128_20_1_1"  4  5  5  5  4  4  5  5  5  5  5  5
    "2128_21_1_1"  0  4  4  4  0  0  4  4  4  4  4  4
    "2128_14_3_3"  5  5  5  5  5  5  5  5  5  5  5  5
    "2128_3_2_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2128_19_1_2"  4  4  4  4  4  4  4  4  4  4  4  4
    "2128_24_1_1"  6  6  6  6  6  6  6  6  6  6  6  6
    "2128_13_2_2" 11  4  4  4  4  4  4  4  4  4  4  4
    "2128_14_3_2"  6  6  6  6  6  6  6  6  6  6  6  6
    "2128_11_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2129_14_8_2"  4 17 17 17  4  0 17 17 17 17 17 17
    "2129_5_3_2"   0  6  6  6  0  0  0  0  6  6  6  6
    "2129_10_2_2" 17  5  5  5 17 17 17 17 17 16  5  5
    "2129_15_3_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2129_1_1_1"  17  5  5  5 17 17 17 17 16  5  5  5
    "2129_5_3_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2129_10_2_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2129_16_3_2" 16  5  5  5 11 11  5  5  5  5  5  5
    "2129_4_2_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2129_16_3_1"  6 17 17 17  6  6  6  0  0 17 17 17
    "2129_9_4_2"   5  5  5  5  5  5  5  5  5  5  5  5
    "2129_9_4_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2130_10_1_1"  0  0 18  6  0  0  0  0  0  0  0  0
    "2130_8_1_1"   0  6  6  6  0  0  0  0  0  0  0  0
    "2130_13_2_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2130_9_1_1"   0  6  6  6  0  0  0  0  6  5  6  6
    "2130_5_1_1"   6  6  6  6  6  6  6  6  6  6  6  6
    "2130_29_1_1" 17 17 16  5 17 17 17 17 17 17 17 17
    "2130_12_1_1" 17  4  4  4 17 17 17 17 17 16  4  4
    "2130_1_1_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2130_4_1_1"  17  4  4  4 17 17 17 17 17 17 17 16
    "2130_28_1_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2130_15_1_1"  0  6  6  6  0  0  0  0  0  0  0  6
    "2130_36_1_1"  0  5  5  5  0  0  0  0  0  0  5  5
    "2131_5_1_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2131_8_1_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2131_6_2_1"   6  6  6  6  6  6  6  6  6  6  6  6
    "2131_8_4_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2131_2_1_1"   6  6  6  6  6  6  6  6  6  6  6  6
    "2131_8_2_1"  17  6  6  6 17 17 17 17 17 16 11  6
    "2131_8_5_1"  17  4  4  4 16  4  4  4  4  4  4  4
    "2131_3_1_1"   0 17 17 16  0 17 17 17 17 17 17 17
    "2131_2_2_1"  17  4  4  4 17 17 17 17 17 16 11  4
    "2131_8_3_1"  17  6  6  6 17 17 17 16 11 11  6  6
    "2131_16_1_1" 17  5  5  5 16  5  5  5  5  5  5  5
    "2131_3_2_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2133_5_1_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2133_16_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2133_1_2_1"   0  0  0  0  0  0  0  0  0  0  0  0
    "2133_1_3_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2133_16_2_1" 17 11  4  4 17 17 17 17 17 17 17 16
    "2133_2_1_1"  17  5  5  5 17 17 17 17 17 16 11 11
    "2133_20_1_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2133_18_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2133_13_1_1" 17  5  5  5 17 16  5  5  5  5  5  5
    "2133_15_2_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2133_16_3_1"  6  6  6  6  6  6  6  6  6  6  6  6
    "2133_6_1_1"   0 17 17 16  0  0 17 17 17 17 17 17
    "2134_5_3_1"  17  6  6  6 16  6  6  6  6  6  6  6
    "2134_3_1_1"   7  7  7  7  7  7  7  7  7  7  7  7
    "2134_9_1_1"  17  4  4  4 17 17 17 17 16  4  4  4
    "2134_15_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2134_10_1_4"  5  5  5  5  5  5  5  5  5  5  5  5
    "2134_11_1_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2134_8_1_1"   4  4  4  4  4 16  4  4  4  4  4  4
    "2134_2_1_1"  16  5  5  5  5  5  5  5  5  5  5  5
    "2134_11_4_1"  0  4  4  4  0  0  0  4  4  4  4  4
    "2134_5_4_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2134_3_2_1"  17  5  5  5 17 17 17 17 17 16  5  5
    "2134_10_2_3" 17  4  4  4 17 17 17 17 16  4  4  4
    "2135_19_5_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2135_17_5_1" 17  3  3  3 17 17 17 16  3  3  3  3
    "2135_11_5_2"  0  4  4  4  0  0  0  0  0  4  4  4
    "2135_19_5_2"  5  5  5  5  5  5  5  5  5  5  5  5
    "2135_5_1_1"  16  4  4  4  4  4  4  4  4  4  4  4
    "2135_2_2_2"   4  4  4  4  4  4  4  4  4  4  4  4
    "2135_11_5_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2135_16_4_1" 17  4  4  4 16  4  4  4  4  4  4  4
    "2135_16_4_2" 17  7  7  7 17 17 16  7  7  7  7  7
    "2135_10_3_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2135_3_3_1"   5 17 17 17  5  5 17 17 17 17 17 17
    "2135_15_4_3"  6  6  6  6  6  6  6  6  6  6  6  6
    "2136_4_2_1"   3  3  3  3  3  3  3  3  3  3  3  3
    "2136_5_1_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2136_2_1_1"  17  4  4  4 17 17 17 17 16  4  4  4
    "2136_18_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2136_1_1_1"  17  6  6  6 17 17 16  6  6  6  6  6
    "2136_4_1_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2136_21_2_1"  4  4  4  6  4  4  4  4  4  4  4  4
    "2136_11_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2136_3_1_1"   4  4  4  4  4  4  4  4  4  4  4  4
    "2136_19_1_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2136_12_1_1" 17  5  5  5 17 17 17 17 17 17 17 16
    "2136_16_1_1"  5  5  5  5  5  5  5  5  5  5  5  5
    "2137_10_1_1"  4  4  4  4  4  4  4  4  4  4  4  4
    "2137_13_1_1" 17 16  0  5 17 17 17 17 17 17 17 17
    "2137_6_2_1"   5  5  5  5  5  5  5  5  5  5  5  5
    "2137_7_1_1"   0  5  5  5  0  0  0  0  0  5  5  5
    end
    label values q407_mth1 q407
    label values q407_mth10 q407
    label values q407_mth11 q407
    label values q407_mth12 q407
    label values q407_mth2 q407
    label values q407_mth3 q407
    label values q407_mth4 q407
    label values q407_mth5 q407
    label values q407_mth6 q407
    label values q407_mth7 q407
    label values q407_mth8 q407
    label values q407_mth9 q407
    label def q407 0 "No Method", modify
    label def q407 1 "Female Sterilization", modify
    label def q407 2 "Male Sterilization", modify
    label def q407 3 "IUD", modify
    label def q407 4 "Injectables", modify
    label def q407 5 "Implants", modify
    label def q407 6 "Pill", modify
    label def q407 7 "Condom", modify
    label def q407 8 "Female Condom", modify
    label def q407 9 "Emergency contraception", modify
    label def q407 10 "Standard days method", modify
    label def q407 11 "Lactational Amenorrhea Method", modify
    label def q407 12 "Rhythm method", modify
    label def q407 13 "Withdrawal", modify
    label def q407 14 "Other modern Method", modify
    label def q407 15 "Other traditional Method", modify
    label def q407 16 "Birth", modify
    label def q407 17 "Pregnancy", modify
    label def q407 18 "Termination", modify
    Code:
    Code:
    * Check for duplicates in original wide-format data (pre-reshape)
    duplicates report uniquegirlid  // Should be 1 copy per girl
    duplicates report uniquegirlid q407_mth1-q407_mth12  // Verify no duplicates
    
    * Step 1: Reshape to long format
    encode uniquegirlid, gen(numeric_id)  // Convert string ID to numeric
    order numeric_id uniquegirlid  
    reshape long q407_mth, i(numeric_id) j(mth_num)  // Reshape to long format
    rename q407_mth method_code  
    
    * Remove duplicates introduced by reshape (if any)
    duplicates drop numeric_id mth_num, force  // Critical: Remove duplicates
    duplicates report numeric_id mth_num  // Confirm no duplicates
    assert r(unique_value) == _N  // Assert no duplicates
    
    * Step 2: Define program methods and pregnancy events
    gen program_method = inrange(method_code, 1, 14)  // Codes 1-14 = modern methods
    gen pregnancy_event = inlist(method_code, 16, 17, 18)  // Births/pregnancies
    
    * Step 3: Identify continuous use spells
    sort numeric_id mth_num  // Critical for tsspell
    tsset numeric_id mth_num  // Declare panel structure
    tsspell program_method, cond(program_method == 1)  // Generates _spell, _seq
    
    * Step 4: Calculate spell duration
    bysort numeric_id _spell: gen timespan = _N  // Duration = months in spell
    
    * Step 5: Expand spells into monthly intervals
    expand timespan if program_method == 1  // Create 1 row per month-at-risk
    
    * Step 6: Generate spell-specific interval (CRITICAL FIX)
    sort numeric_id _spell mth_num  // Ensure correct order
    bysort numeric_id _spell: gen spell_interval = _n  // Spell-specific counter (1, 2, ..., timespan)
    drop mth_num  // Avoid calendar-month duplicates; use spell_interval instead
    
    * Validate interval
    bysort numeric_id _spell: assert spell_interval == _n  // Must have 0 contradictions
    assert !missing(spell_interval)
    
    * Step 7: Flag discontinuations (failures)
    gen failure = 0  // Initialize failure variable
    
    // Discontinuation occurs if:
    // 1. The current interval is the last month of the spell, AND
    // 2. The next month shows discontinuation (stopped/switched/pregnancy)
    replace failure = 1 if (spell_interval == timespan) & ( ///
        method_code[_n+1] == 0 |        /// Stopped all methods
        method_code[_n+1] == 15 |       /// Switched to traditional
        pregnancy_event[_n+1] == 1      /// Pregnancy/birth
    )
    
    * Validate failure variable
    assert inlist(failure, 0, 1)  // Must be 0 or 1
    
    * Step 8: Aggregate and calculate continuation rates (CRx)
    preserve
    sort spell_interval  // Ensure proper grouping
    collapse (sum) Tx=failure (count) Nx=spell_interval, by(spell_interval)  // Group by spell_interval
    gen qx = Tx / Nx  // Discontinuation probability
    gen survival = 1 - qx  // Continuation probability
    gen CRx = exp(sum(ln(survival)))  // Cumulative continuation rate
    format CRx %4.3f
    list spell_interval CRx if spell_interval <= 12, clean noobs  // Final results
    restore

  • #2
    You can't have spell_interval as both the -by()- variable and as something being aggregated in the -collapse- command. Do this instead:
    Code:
    gen byte one = 1 if !missing(spell_interval)
    collapse (sum) Tx=failure (count) Nx=one, by(spell_interval)  // Group by spell_interval

    Comment


    • #3
      Thanks for the example and thanks for Clyde Schechter (as always) for pointing out the issue. The error message in Stata 17 and later is not very helpful, Stata 16 will error out with variable spell_interval not found, which is not great but more informative. We will look into producing a better error message in a future update.

      Comment


      • #4
        Originally posted by Clyde Schechter View Post
        Thank you very much.

        Comment

        Working...
        X