/* Attempted verification of
	Crescenzi (2007)
	"Reputation and Interstate Conflict"
	based on Crescenzi2007.dta, renamed from
	ajps_rr_2.dta from:
	http://www.unc.edu/~crescenz/data/ajps07.zip

   WARNING: This data file has over a half million observations. Running the
   Do-file can take some time, especially for some commands.

*/

clear all
set more off
version 9.1 //I chose this version because it's what was available in 2005-6.

use "Crescenzi2007.dta"

describe, short

/* Given that the dataset is sorted by 'dyadid year', I suspect that this is
   also the unit of analysis. The 'isid' command and lack of output
   supports this assertion.
*/

*isid dyadid year

/* Because Crescenzi ran a Cox survival analysis, I suspect that the dataset
   may be 'survival-time set'. The 'stset' command and output supports this
   assertion. See also 'help st'.
   NOTE: Because of the type of estimation, we will use the 'stcox' command to
   run models. This command does NOT specify the dependent variable because
   the DV is already contained within the stset information.
*/

stset

/*
   stset output:
-> stset year, id(dyadid) failure(cwmid) exit(time .) origin(enter==1)

                id:  dyadid
     failure event:  cwmid != 0 & cwmid < .
obs. time interval:  (year[_n-1], year]
 exit on or before:  time .
    t for analysis:  (time-origin)
            origin:  enter==1

------------------------------------------------------------------------------
     655109  total observations
      19850  observations end on or before enter()
------------------------------------------------------------------------------
     635259  observations remaining, representing
      19620  subjects
       2386  failures in multiple-failure-per-subject data
     660830  total analysis time at risk and under observation
                                                at risk from t =         0
                                     earliest observed entry t =         0
                                          last observed exit t =       183
*/

codebook, compact
/* Because it took time to run the codebook command, I copy-and-pasted the
   codebook information into a plain text document for reference.
*/

/* Model 2 of Table 1

   I chose to verify this model first because it has the fewest components.
   Through guess-work and trial and error, I determined that the variable
   RISc_{AB} in the paper was the variable RISc_min in the dataset.
*/
eststo M2: stcox RISc_min, nohr cluster(dyadid)

/* Models 1 and 3 use the minimum Polity score in the dyad, but I couldn't find
   an obvious variable to match. After some digging in the original ZIP-file,
   the variable 'smldmat' is the one Crescenzi used. I did not figure out
   how to construct 'smldmat' from the monadic components of
   'democ*', 'autoc*', and/or 'polity*'.

   However, I had generated 'mindem' from polity1 and polity2, correcting
   for Polity's missing-value codes of -88, -77, and -66.
   I initially did not add the if-statement on the 'gen mindem' command, which
   gave a value even if either polity1 or polity2 had a missing value.
   However, I also found that polity1 and polity2 had a different number of
   observations than if I generated polity scores from their base components
   (democ - autoc). So, I made polity*_new.
*/
mvdecode democ* autoc* polity*, mv(-88 -77 -66)
gen polity1_new = democ1 - autoc1
gen polity2_new = democ2 - autoc2
//CHOOSE ONE 'gen mindem' COMMAND TO RUN; COMMENT OUT THE OTHER:
//I've kept the one that keeps the most cases (*_new).
*gen mindem = min(polity1, polity2) if polity1!=. & polity2!=.
gen mindem = min(polity1_new, polity2_new) if polity1_new!=. & polity2_new!=.

/* Model 1 of Table 1 */
#delimit ;
	eststo M1: stcox
		I contigdum lcaprat minmin smldmat S
		,
		nohr cluster(dyadid)
	;
	eststo M1_corrected: stcox
		I contigdum lcaprat minmin mindem S
		,
		nohr cluster(dyadid)
	;

#delimit cr

/* Model 3 of Table 1 */
#delimit ;
	eststo M3: stcox
		RISc_min I RI contigdum lcaprat minmin smldmat S
		,
		nohr cluster(dyadid)
	;
	eststo M3_corrected: stcox
		RISc_min I RI contigdum lcaprat minmin mindem S
		,
		nohr cluster(dyadid)
	;
#delimit cr

/* This table verifies Table 1 in the article.
   Note that I used three decimal places for the coefficients to match
   Crescenzi's Regime Score formatting.
   Note also that I changed the stars to fit one-tail tests.
*/
#delimit ;
	esttab M1 M2 M3,
		nodepvars
		b(%9.3f) se(%9.3f)
		star(* 0.10 ** 0.02 *** 0.002)
		order(RISc_min I RI contigdum lcaprat minmin smldmat S)
		noobs
		scalars(
			risk
			N_fail
			ll
			chi2
			r2_p
			N_clust
		)
		sfmt(
			%9.0f
			%9.0f
			%9.0f
			%9.1f
			%9.2f
			%9.0f
		)
	;
#delimit cr

/* This table uses the correct minimum polity scores (changing the missing
   value codes from numbers to actual missing values).
*/
#delimit ;
	esttab M1_corrected M2 M3_corrected,
		nodepvars
		b(%9.3f) se(%9.3f)
		star(* 0.10 ** 0.02 *** 0.002)
		order(RISc_min I RI contigdum lcaprat minmin mindem S)
		noobs
		scalars(
			risk
			N_fail
			ll
			chi2
			r2_p
			N_clust
		)
		sfmt(
			%9.0f
			%9.0f
			%9.0f
			%9.1f
			%9.2f
			%9.0f
		)
	;
#delimit cr