6-7주차 실습 코드

## 공분산과 상관분석 
data(iris);str(iris)
y<-iris$Petal.Length 
x<-iris$Petal.Width 
plot(x,y)
cov(x,y)
cor(x,y);cor.test(x,y)

#회귀분석
m1<- lm(Petal.Length ~ Petal.Width, data=iris)

hist(resid(m1))
qqnorm(resid(m1))
qqline(resid(m1))
# 귀무가설 : 자료가 정규분포를 따른다.  
# 대립가설 : 자료가 정규분포를 따르지 않는다. (적어도 하나의 자료는 정규분포를 따르지 않는다.) 
# shapiro.test(resid(m1))

summary(m1)
predict(m1,data.frame(Petal.Width=0.2)) 
iris[iris$Petal.Width==0.2,c('Petal.Length')]
mean(iris[iris$Petal.Width==0.2,c('Petal.Length')])

# Classification

data(Sonar,package="mlbench")
summary(Sonar);prop.table(table(Sonar$Class))

set.seed(1)
ind<-sample(2,nrow(Sonar),replace=T,prob=c(0.8,0.2))

traindata<-Sonar[ind==1,]
testdata<-Sonar[ind==2,]

train.label<-traindata[,61];test.label<-testdata[,61]
train_data<-traindata[,-61];test_data<-testdata[,-61]

# naive classification
library(e1071)
naivemodel <- naiveBayes(Class ~ ., data=traindata)
pred <- predict(naivemodel, testdata[,-61])
(x<-table(pred, testdata$Class))
sum(diag(x))/sum(x)
naivemodel$apriori   # Prior Prob
naivemodel$tables    # Likelihood

# Random Forest
library(randomForest)
rf<-randomForest(Class~.,data=traindata,importance=T)
pred.rf<-predict(rf,newdata=testdata)
(f<-table(pred.rf,testdata$Class))
sum(diag(f))/sum(f)
rf
plot(rf)

# k-means
data("iris");str(iris)
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()

set.seed(20)
kmeanCluster <- kmeans(iris[, 3:4], 3, nstart = 20)

kmeanCluster$cluster <- as.factor(kmeanCluster$cluster)
ggplot(iris, aes(Petal.Length, Petal.Width, color = kmeanCluster$cluster)) + geom_point()

table(iris$Species, kmeanCluster$cluster)


# hierarchical clustering
clusters <- hclust(dist(iris[, 3:4]), method = 'average')
plot(clusters)
clusterCut <- cutree(clusters, 3)
cluster.cut <- as.factor(clusterCut)
ggplot(iris, aes(Petal.Length, Petal.Width, color = cluster.cut)) + geom_point()

table(iris$Species, clusterCut)


1 2 3 4 5 6 7 8 9 10 다음

와이드 애드센스



애드센스 사이드 광고