Beispieldaten zum Kausalitätsbuch

Aus Metheval - Tipps+Tools - Wiki
Wechseln zu: Navigation, Suche

Tabelle 3.1

#### Tabelle

Table31 <- function(PrintTable=F)
{
  NumberOfUnits = 6
  Unit <- paste(rep("u",NumberOfUnits),1:NumberOfUnits,sep="")
  SamplingProbability <- rep(1/NumberOfUnits,NumberOfUnits)
  CovariateGender <- c(0,0,0,0,1,1)
  TrueOutcomeUnderControl <- c(68,81,89,92,112,119)
  TrueOutcomeUnderTreatment1 <- c(82,89,101,108,118,123)
  TrueOutcomeUnderTreatment2 <- c(75,78,92,85,120,111)
  
  ControlProbability <- c(1/14,2/14,3/14,4/14,5/14,6/14)
  TreatmentProbability1 <- c(12/14,10/14,8/14,6/14,4/14,2/14)
  TreatmentProbability2 <- (1-TreatmentProbability1-ControlProbability)
  
  ConditionalTreatment1ProbabilityForU  <- TreatmentProbability1 * SamplingProbability / mean(TreatmentProbability1)
  ConditionalTreatment2ProbabilityForU  <- TreatmentProbability2 * SamplingProbability / mean(TreatmentProbability2)
  ConditionalControlProbabilityForU    <-  ControlProbability * SamplingProbability / mean(ControlProbability)

  if (PrintTable)
  {
    cat("Unit \t Sampling    \t Covariate \t True Outcome    \t True Outcome      \t True Outcome      \t Probability \t Probability      \t Probability     \n")
    cat("U    \t Probability \t Gender    \t under Control   \t under Treatment 1 \t under Treatment 2 \t for Control \t for Treatment 1  \t for Treatment 2 \n")
    cat(rep("-",85),"\n")
    for (i in (1:NumberOfUnits))
    {
       cat(Unit[i],"\t",SamplingProbability[i],"\t",CovariateGender[i],"\t\t")
       cat(" ", TrueOutcomeUnderControl[i],"\t\t\t ", TrueOutcomeUnderTreatment1[i],"\t\t\t",TrueOutcomeUnderTreatment2[i],"\t\t\t")
       cat(" ", ControlProbability[i],"\t ", TreatmentProbability1[i],"\t\t",TreatmentProbability2[i],"\t\n")
    }
    cat(rep("-",85),"\n")

  }

  return(list(Unit=Unit,SamplingProbability=SamplingProbability,CovariateGender=CovariateGender,
              TrueOutcomeUnderControl=TrueOutcomeUnderControl, TrueOutcomeUnderTreatment1=TrueOutcomeUnderTreatment1,TrueOutcomeUnderTreatment2=TrueOutcomeUnderTreatment2,
              ControlProbability=ControlProbability,TreatmentProbability1=TreatmentProbability1,TreatmentProbability2=TreatmentProbability2,
              NumberOfUnits=NumberOfUnits,
              ConditionalControlProbabilityForU=ConditionalControlProbabilityForU,
              ConditionalTreatment1ProbabilityForU=ConditionalTreatment1ProbabilityForU,
              ConditionalTreatment2ProbabilityForU=ConditionalTreatment2ProbabilityForU))

}

### Datenerzeugung

t<-Table31(PrintTable=T)

DatensatzA <- NULL
DatensatzB <- NULL
SampleSize <- 500

for (i in (1:SampleSize))
{

  Unit <- DrawPerson(t$NumberOfUnits-1)
  Gender <- t$CovariateGender[Unit]

  TrueOutcome1 <- DrawOutcome(mean(t$TrueOutcomeUnderTreatment1[t$CovariateGender==Gender]),sqrt(var(t$TrueOutcomeUnderTreatment1[t$CovariateGender==Gender])))
  TrueOutcome2 <- DrawOutcome(mean(t$TrueOutcomeUnderTreatment2[t$CovariateGender==Gender]),sqrt(var(t$TrueOutcomeUnderTreatment2[t$CovariateGender==Gender])))
  TrueControl <- DrawOutcome(mean(t$TrueOutcomeUnderControl[t$CovariateGender==Gender]),sqrt(var(t$TrueOutcomeUnderControl[t$CovariateGender==Gender])))

  TreatmentProbability1 <- t$TreatmentProbability1[Unit]
  TreatmentProbability2 <- t$TreatmentProbability2[Unit]
  
  Treatment <- SelectTreatment(TreatmentProbability1,TreatmentProbability2)

  if (Treatment==0) {
    Outcome <- TrueControl
  } else if (Treatment==1) {
    Outcome <- TrueOutcome1
  } else if (Treatment==2) {
    Outcome <- TrueOutcome2
  }

  Y1  <- 0 + 1 * Outcome + rnorm(1,0,0.25)
  Y2  <- 0 + 1 * Outcome + rnorm(1,0,0.25)
  Y3  <- 0 + 1 * Outcome + rnorm(1,0,0.25)

  DatensatzA <- rbind(DatensatzA,c(Unit=Unit,Gender=Gender,Treatment=Treatment,Outcome=Outcome,TreatmentProbability1=TreatmentProbability1,TreatmentProbability2=TreatmentProbability2,Y1=Y1,Y2=Y2,Y3=Y3))

  ConditionalTreatment1ProbabilityForU  <- t$TreatmentProbability1[Unit] * t$SamplingProbability[Unit] / mean(t$TreatmentProbability1)
  ConditionalTreatment2ProbabilityForU  <- t$TreatmentProbability2[Unit] * t$SamplingProbability[Unit] / mean(t$TreatmentProbability2)
  ConditionalControlProbabilityForU    <-  t$ControlProbability[Unit] * t$SamplingProbability[Unit] / mean(t$ControlProbability)

  TrueOutcome1Weighted <- TrueOutcome1 * ConditionalTreatment1ProbabilityForU
  TrueOutcome2Weighted <- TrueOutcome2 * ConditionalTreatment2ProbabilityForU
  TrueControlWeighted <- TrueControl * ConditionalControlProbabilityForU

  DatensatzB <- rbind(DatensatzB,c(Unit=Unit,Gender=Gender,Treatment=Treatment,Outcome=Outcome,TreatmentProbability1=TreatmentProbability1,TreatmentProbability2=TreatmentProbability2,Y1=Y1,Y2=Y2,Y3=Y3,
                      TrueOutcome1=TrueOutcome1, TrueOutcome2=TrueOutcome2, TrueControl=TrueControl, TrueOutcome1Weighted=TrueOutcome1Weighted, TrueOutcome2Weighted=TrueOutcome2Weighted, TrueControlWeighted=TrueControlWeighted))

}

write.table(x=DatensatzA,file='32_DatensatzA.dat',append=FALSE,col.names=TRUE,row.names=FALSE)
write.table(x=DatensatzB,file='32_DatensatzB.dat',append=FALSE,col.names=TRUE,row.names=FALSE)


Tabelle 4.1

#### Tabelle

Table41 <- function(PrintTable=F)
{
  NumberOfUnits = 6
  Unit <- paste(rep("u",NumberOfUnits),1:NumberOfUnits,sep="")
  SamplingProbability <- rep(1/NumberOfUnits,NumberOfUnits)
  CovariateGender <- c(0,0,0,0,1,1)
  TrueOutcomeUnderControl <- c(68,78,88,98,106,116)
  TrueOutcomeUnderTreatment <- c(81,86,100,103,114,130)
  TreatmentProbability <- c(6/7,5/7,4/7,3/7,2/7,1/7)
  ControlProbability <-  (1-TreatmentProbability)

  ConditionalTreatmentProbabilityForU  <- TreatmentProbability * SamplingProbability / mean(TreatmentProbability)
  ConditionalControlProbabilityForU    <- ControlProbability * SamplingProbability / mean(ControlProbability)

  if (PrintTable)
  {
    cat("Unit \t Sampling    \t Covariate \t True Outcome    \t True Outcome     \t Probability   \t| Individual   \t P(U=u|X=0) \t P(U=u|X=1) \n")
    cat("U    \t Probability \t Gender    \t under Control   \t under Treatment  \t for Treatment \t| causal effect\t            \t            \n")
    cat(rep("-",75),"\n")
    for (i in (1:NumberOfUnits))
    {
       cat(Unit[i],"\t",SamplingProbability[i],"\t",CovariateGender[i],"\t\t")
       cat(" ", TrueOutcomeUnderControl[i],"\t\t\t ", TrueOutcomeUnderTreatment[i],"\t\t\t",TreatmentProbability[i],"\t")
       cat("|", TrueOutcomeUnderTreatment[i]-TrueOutcomeUnderControl[i],"\t\t",ConditionalTreatmentProbabilityForU[i],"\t", ConditionalControlProbabilityForU[i],"\n")
    }
    cat(rep("-",75),"\n")
    cat("E(Y|X=0) = ",mean(TrueOutcomeUnderControl),"\n")
    cat("E(Y|X=1) = ",mean(TrueOutcomeUnderTreatment),"\n")
    cat("E(P(X=1)) = ",mean(TreatmentProbability),"\n\n")

    cat("E(Y|X=0,Z=0) = ",mean(TrueOutcomeUnderControl[CovariateGender==0]),"\n")
    cat("E(Y|X=0,Z=1) = ",mean(TrueOutcomeUnderControl[CovariateGender==1]),"\n")
    cat("E(Y|X=1,Z=0) = ",mean(TrueOutcomeUnderTreatment[CovariateGender==0]),"\n")
    cat("E(Y|X=1,Z=1) = ",mean(TrueOutcomeUnderTreatment[CovariateGender==1]),"\n\n")

    cat("Average causal effect (ACE):", mean(TrueOutcomeUnderTreatment)-mean(TrueOutcomeUnderControl),"\n")
    cat("Prima facie effect (PFE):", sum(ConditionalTreatmentProbabilityForU*TrueOutcomeUnderTreatment)-sum(ConditionalControlProbabilityForU*TrueOutcomeUnderControl),"\n")
  }
  
  return(list(Unit=Unit,SamplingProbability=SamplingProbability,CovariateGender=CovariateGender,
              TrueOutcomeUnderControl=TrueOutcomeUnderControl, TrueOutcomeUnderTreatment=TrueOutcomeUnderTreatment,
              TreatmentProbability=TreatmentProbability,NumberOfUnits=NumberOfUnits,
              ConditionalControlProbabilityForU=ConditionalControlProbabilityForU,
              ConditionalTreatmentProbabilityForU=ConditionalTreatmentProbabilityForU))

}

### Datenerzeugung

t<-Table41(PrintTable=T)

DatensatzA <- NULL
DatensatzB <- NULL
SampleSize <-500

for (i in (1:SampleSize))
{

  Unit <- DrawPerson(t$NumberOfUnits-1)
  Gender <- t$CovariateGender[Unit]

  TrueOutcome <- DrawOutcome(mean(t$TrueOutcomeUnderTreatment[t$CovariateGender==Gender]),sqrt(var(t$TrueOutcomeUnderTreatment[t$CovariateGender==Gender])))
  TrueControl <- DrawOutcome(mean(t$TrueOutcomeUnderControl[t$CovariateGender==Gender]),sqrt(var(t$TrueOutcomeUnderControl[t$CovariateGender==Gender])))

  TreatmentProbability <- t$TreatmentProbability[Unit]
  Treatment <- SelectTreatment(TreatmentProbability)

  if (Treatment==1) {
    Outcome <- TrueOutcome
  } else if (Treatment==0) {
    Outcome <- TrueControl
  }

  Y1  <- 0 + 1 * Outcome + rnorm(1,0,0.25)
  Y2  <- 0 + 1 * Outcome + rnorm(1,0,0.25)
  Y3  <- 0 + 1 * Outcome + rnorm(1,0,0.25)

  DatensatzA <- rbind(DatensatzA,c(Unit=Unit,Gender=Gender,Treatment=Treatment,Outcome=Outcome,TreatmentProbability=TreatmentProbability,Y1=Y1,Y2=Y2,Y3=Y3))

  ConditionalTreatmentProbabilityForU  <- t$TreatmentProbability[Unit] * t$SamplingProbability[Unit] / mean(t$TreatmentProbability)
  ConditionalControlProbabilityForU    <- (1-t$TreatmentProbability[Unit]) * t$SamplingProbability[Unit] / mean(t$TreatmentProbability)
  TrueOutcomeWeighted <- TrueOutcome * ConditionalTreatmentProbabilityForU
  TrueControlWeighted <- TrueControl * ConditionalControlProbabilityForU
  
  DatensatzB <- rbind(DatensatzB,c(Unit=Unit,Gender=Gender,Treatment=Treatment,Outcome=Outcome,TreatmentProbability=TreatmentProbability,Y1=Y1,Y2=Y2,Y3=Y3,TrueOutcome=TrueOutcome,TrueControl=TrueControl,TrueOutcomeWeighted=TrueOutcomeWeighted, TrueControlWeighted=TrueControlWeighted))

}

write.table(x=DatensatzA,file='42_DatensatzA.dat',append=FALSE,col.names=TRUE,row.names=FALSE)
write.table(x=DatensatzB,file='42_DatensatzB.dat',append=FALSE,col.names=TRUE,row.names=FALSE)


Funktionen

#### Funktionen

# Ziehe eine Person

DrawPerson <- function(n=6)
{
  return(round(runif(1)*n+1,0))
}

# Wähle ein Treatment aus

SelectTreatment <- function(TreatmentProbability1=0.5,TreatmentProbability2=0,n=1)
{
  tmp <- runif(n)
  t <- + 2*(tmp>TreatmentProbability1 & tmp<=TreatmentProbability1+TreatmentProbability2) + 1* (tmp<=TreatmentProbability1)
  return(t)
}

# Ziehe ein Outcome

DrawOutcome <- function(Mean=0, SD=1)
{
  return(rnorm(1, Mean, SD) + rnorm(1,0,0.15))
}