r - R 3个变量的多重/逻辑回归，统计检验？

Question

我需要一些有关统计测试代码的帮助。基本上，我正在尝试研究年龄、政治地位和大麻合法化观点之间的关系。数据集是 2010 年英国社会态度调查。

bsa_2010 <- read.csv("https://dl.dropboxusercontent.com/s/ubl9huokroj9jw8/bsa%202010.csv")
> dput(head(bsa_2010))
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("England", 
"Scotland", "Wales"), class = "factor"), RSex = structure(c(1L, 
1L, 2L, 2L, 2L, 1L), .Label = c("Female", "Male"), class = "factor"), 
    RAge = c(75L, 34L, 81L, 25L, 33L, 76L), MarStat = structure(c(4L, 
    4L, 2L, 3L, 3L, 5L), .Label = c("Living as married", "Married", 
    "Not married", "Separated or divorced after marrying", "Widowed"
    ), class = "factor"), ChildHh = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), WhPaper = structure(c(8L, 
    8L, 8L, 11L, 12L, 8L), .Label = c("(Scottish) Daily Express", 
    "(Scottish) Daily Mail", "Daily Mirror/ Scottish Mirror", 
    "Daily Record", "Daily Star", "Daily Telegraph", "Financial Times", 
    "Skip,not read paper normally", "The Guardian", "The Independent", 
    "The Sun/ Scottish Sun", "The Times"), class = "factor"), 
    PartyIDN = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer (WRITE IN)", "Other party (WRITE IN)", 
    "Plaid Cymru", "Refused to say", "Scottish National Party", 
    "UK Independence Party (UKIP)/Veritas"), class = "factor"), 
    Partyid1 = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer", "Other party", "Plaid Cymru", "Refusal", 
    "Scottish National Party", "UK Independence Party (UKIP)/Veritas"
    ), class = "factor"), PartyId2 = structure(c(1L, 5L, 1L, 
    4L, 1L, 4L), .Label = c("Conservative", "Green Party", "Labour", 
    "Liberal Democrat", "None", "Other party", "Other/DK/Ref"
    ), class = "factor"), Spend1 = structure(c(3L, 4L, 4L, 3L, 
    3L, 4L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Social security benefits"
    ), class = "factor"), Spend2 = structure(c(6L, 3L, 2L, 4L, 
    9L, 10L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Skip,no 1st priority", 
    "Social security benefits"), class = "factor"), RClassGp = structure(c(4L, 
    6L, 1L, 2L, 4L, 6L), .Label = c("Employers in small org; own account workers", 
    "Intermediate occupations", "Lower supervisory & technical occupations", 
    "Managerial & professional occups", "Not classifiable", "Semi-routine & routine occupations", 
    "Skip, never had a job+DK+NA last job"), class = "factor"), 
    RNSSECG = structure(c(4L, 8L, 9L, 3L, 4L, 8L), .Label = c("1.1", 
    "1.2", "Intermediate occupations", "Lower managerial and professional occupations", 
    "Lower supervisory & technical occupations", "Not classified", 
    "Routine occupations", "Semi-routine Occupations", "Small employers and own account workers"
    ), class = "factor"), CanLegal = structure(c(1L, 1L, 1L, 
    2L, 2L, 1L), .Label = c("Taking cannabis should remain illegal", 
    "should be legal, only licenced shops"), class = "factor"), 
    RaceOri3 = structure(c(10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASIAN: of Bangladeshi origin", 
    "ASIAN: of Chinese origin", "ASIAN: of Indian origin", "ASIAN: of Pakistani origin", 
    "ASIAN: of other origin (WRITE IN)", "BLACK: of African origin", 
    "BLACK: of Caribbean origin", "MIXED ORIGIN (WRITE IN)", 
    "OTHER (WRITE IN)", "WHITE: of any origin"), class = "factor"), 
    Agecat1 = structure(c(6L, 2L, 7L, 1L, 2L, 6L), .Label = c("(18,28]", 
    "(28,38]", "(38,48]", "(48,58]", "(58,68]", "(68,78]", "(78,88]", 
    "(88,98]"), class = "factor"), Agecat2 = structure(c(3L, 
    1L, 4L, 1L, 1L, 3L), .Label = c("(18,38]", "(38,58]", "(58,78]", 
    "(78,98]"), class = "factor")), .Names = c("Country", "RSex", 
"RAge", "MarStat", "ChildHh", "WhPaper", "PartyIDN", "Partyid1", 
"PartyId2", "Spend1", "Spend2", "RClassGp", "RNSSECG", "CanLegal", 
"RaceOri3", "Agecat1", "Agecat2"), row.names = c(NA, 6L), class = "data.frame")

使用的变量是：RAge（年龄）PartyIDN（政党确定）CanLegal（大麻合法化）

为简化起见，我将年龄分类，只保留了两个最大的政党。

bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))

Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")


Parties$PartyIDN <- factor(Parties$PartyIDN)

我将如何进行统计测试以获取有关年龄、政治以及他们如何看待大麻合法化之间关系的 P 值？

欢迎任何帮助，谢谢！

score 0 · Accepted Answer

看起来您还需要一些基础知识帮助，所以我会在这里查看 https://stats.idre.ucla.edu/r/dae/logit-regression/的统计部分，但 R 部分很简单（我要去将您的新年龄类别视为一个因素而不是连续变量）...

bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))
Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")
Parties$PartyIDN <- factor(Parties$PartyIDN)
str(Parties)
Parties$Agecat1<-factor(Parties$Agecat1)
firstattempt <- glm(CanLegal ~ Agecat1 + PartyIDN, data = Parties, family = "binomial")
summary(firstattempt)

对于什么值得你削减年龄无助于使R命令或统计数据更容易。你可以很容易地做到：

secondattempt <- glm(CanLegal ~ RAge + PartyIDN, data = Parties, family = "binomial")
summary(secondattempt)

它实际上使解释数据更容易，这支持了年龄对结果很重要但政党不重要的假设。您可以通过这些非常简单的图表看到

mosaicplot(xtabs(~CanLegal + PartyIDN, data = Parties))
mosaicplot(xtabs(~CanLegal + Agecat1, data = Parties))

r - R 3个变量的多重/逻辑回归，统计检验？

1 回答 1

Related

Reference