提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
read.table(sep="###") test <- read.csv("./data/chapter07/test1.csv",header=F,fileEncoding = “UTF-8”) data_click <- read.csv("./data/chapter07/data_click.csv",header=F) # 导入点击事件数据 head(data_click) # 查看前六行 nrow(data_click)
targetuserid <- unique(data_click[data_clickKaTeX parse error: Expected 'EOF', got '#' at position 19: …="11034","V1"])#̲实现了? str(target…V1 %in% targetuserid,]#实现了? str(data_click_new)
#install.packages(“arulesSequences”) library(arulesSequences) tmp_data <- data.frame(click=data_click_new V 3 ) s t r ( t m p d a t a ) t m p d a t a V3) str(tmp_data) tmp_data V3)str(tmpdata)tmpdataclick <- as.factor(tmp_data c l i c k ) s t r ( t m p d a t a ) d a t a c l i c k t r a n < − a s ( t m p d a t a , ′ t r a n s a c t i o n s ′ ) s t r ( d a t a c l i c k t r a n ) t r a n s a c t i o n I n f o ( d a t a c l i c k t r a n ) click) str(tmp_data) data_click_tran <- as(tmp_data,'transactions') str(data_click_tran) transactionInfo(data_click_tran) click)str(tmpdata)dataclicktran<−as(tmpdata,′transactions′)str(dataclicktran)transactionInfo(dataclicktran)sequenceID <- data_click_new V 1 t r a n s a c t i o n I n f o ( d a t a c l i c k t r a n ) V1 transactionInfo(data_click_tran) V1transactionInfo(dataclicktran)eventID<-data_click_new$V2 str(data_click_tran) data_click_tran summary(data_click_tran)
head(as(data_click_tran,“data.frame”)) tail(as(data_click_tran,“data.frame”))
itemFrequencyPlot(data_click_tran, topN=20)
myrules <- cspade(data_click_tran,parameter=list(support=0,maxlen=2), control=list(verbose=TRUE)) myrules <- sort(myrules,by=“support”) # 按照support进行排序 str(myrules) head(as(myrules,“data.frame”)) targetclick <- paste0(".click=11034","[^\}]\}>") # 设置规则表达式 finalrules <-myrules[grep(targetclick ,as(myrules,“data.frame”)$sequence)] str(as(myrules,“data.frame”)) head(as(finalrules,“data.frame”)) nrow(finalrules) # 计算序列个数 #转换成数据框 finalrules.data.frame <- as(finalrules[-1],“data.frame”) #-1?
head(finalrules.data.frame)
finalrules.data.frame p e r c e n t a g e < − f i n a l r u l e s . d a t a . f r a m e percentage <- finalrules.data.frame percentage<−finalrules.data.framesupport/ sum(finalrules.data.frame$support)
finalrules.data.frame s u m . p e r c e n t a g e < − c u m s u m ( f i n a l r u l e s . d a t a . f r a m e sum.percentage <- cumsum(finalrules.data.frame sum.percentage<−cumsum(finalrules.data.framepercentage) head(finalrules.data.frame)
finalrules.data.frame <- finalrules.data.frame[ finalrules.data.frame$sum.percentage <=0.75,]
nrow(finalrules.data.frame)
head(finalrules.data.frame)
clickid <- substr(finalrules.data.frame$sequence,9,13)
clickid
#conf=按钮i引导玩家进入点击玩牌按钮11034的次数/按钮i的点击次
conf <- rep(1,length(clickid)) for(i in 1:length(clickid)) { n <- myrules@info n s e q u e n c e s n c l i c k i d s u p p o r t < − f i n a l r u l e s . d a t a . f r a m e [ i , " s u p p o r t " ] c o n f [ i ] < − n c l i c k i d s u p p o r t ∗ n / n r o w ( d a t a c l i c k [ d a t a c l i c k nsequences nclickid_support <- finalrules.data.frame[i,"support"] conf[i] <- nclickid_support*n/ nrow(data_click[data_click nsequencesnclickidsupport<−finalrules.data.frame[i,"support"]conf[i]<−nclickidsupport∗n/nrow(dataclick[dataclickV3==clickid[i],]) }
result <- data.frame(click=clickid, percentage=round(finalrules.data.frame$percentage,3), conf=conf)
head(result)
#install.packages(“reshape”) library(reshape) md <- melt(result,id=“click”) # 对result数据进行重组 head(md) md v a l u e [ m d value[md value[mdvariable == “conf”] <- -md v a l u e [ m d value[md value[mdvariable == “conf”] head(md) md <- md[order(md$variable,decreasing=T),] # 按照variable变量进行降序排序 head(md)
#library(devtools) #install_github(“madlogos/recharts”) #install.packages(“recharts”) library(recharts) library(plyr) echartr(md,click,value,variable,type=“vbar”,subtype=“stack”) %>% setTitle(“引导用户进入开始打牌11034的重点事件id分析”) %>% setXAxis(axisLine=list(onZero=TRUE)) %>% setYAxis(axisLabel=list( formatter=JS(‘function (value) {return Math.abs(value);}’)))
iris1<-iris set.seed(1234)
library(caret) ind <- createDataPartition(iris1$Species,times=1,p=0.5,list=F) traindata <-iris1[ind,] #构建训练集 testdata <- iris1[-ind,] #构建测试集
library(class) a=knn(traindata[,1:4],testdata[,1:4],traindata[,5],k=5) #指定k值为3 a[1];a[75] # 查看第一条和最后一条测试数据的预测结果
ceshi <- function(n=1,k=3){
x <- (traindata[,1:4]-testdata[rep(n,75),1:4])^2 traindata$dist1 <- apply(x,1,function(x) sqrt(sum(x)))
mydata <- traindata[order(traindata$dist1)[1:k],5:6]
result <- data.frame(sort(table(mydata$Species),decreasing = T))
return(result[1,1]) } ceshi() # 第一个样本的预测结果 ceshi(n=75) # 最后一个样本的预测结果
car <- read.table("./data/chapter08/car.data",sep = “,”)
colnames(car) <- c(“buy”,“main”,“doors”,“capacity”, “lug_boot”,“safety”,“accept”) str(car)
library(caret)
ind <- createDataPartition(car$accept,times=1,p=0.75,list=FALSE)
carTR <- car[ind,] carTE <- car[-ind,]
library(e1071) naiveBayes.model <- naiveBayes(accept~.,data=carTR)
carTR_predict <- predict(naiveBayes.model,newdata=carTR) # 训练集数据 carTE_predict <- predict(naiveBayes.model,newdata=carTE) # 测试集数据
tableTR <- table(actual=carTR a c c e p t , p r e d i c t = c a r T R p r e d i c t ) t a b l e T E < − t a b l e ( a c t u a l = c a r T E accept,predict=carTR_predict) tableTE <- table(actual=carTE accept,predict=carTRpredict)tableTE<−table(actual=carTEaccept,predict=carTE_predict)
errTR <- paste0(round((sum(tableTR)-sum(diag(tableTR)))*100/sum(tableTR), 2),"%") errTE <- paste0(round((sum(tableTE)-sum(diag(tableTE)))*100/sum(tableTE), 2),"%") errTR;errTE