getwd()
[1] “C:/(中略)/NICER1_3_2/2020-11-24NICER1_3_2/NICER_NNS”
[1] "C:/(中略)/NICER1_3_2/2020-11-24NICER1_3_2
myTTR <- function(){ # 独自の命令function()ここから
# ------------------------------------------この部分は上と同じ
kekka <- NULL #
file.zenbu <- list.files() #
ruiseki <- "" #
for (i in file.zenbu){ #
yomikomi <- readLines(i, warn=F) #
tmp1 <- grep("\\*(JPN|NS)", yomikomi, value=T) #
tmp2 <- gsub("\\*(JPN|NS)...:\t", "", tmp1) #
tmp2b <- gsub("[[:punct:]]", "", tmp2) #
tmp2c <- tolower(tmp2b) #
tmp3 <- strsplit(tmp2c, " ") #
tmp4 <- unlist(tmp3) #
tmp4 <- tmp4[tmp4 != ""] #
token.list <- sort(tmp4) #
type.list <- unique(token.list) #
token <- length(token.list) #
type <- length(type.list) #
TTR <- type/token #
TTR.index <- paste(i, type, token, TTR, sep="\t")
kekka <- c(kekka, TTR.index) #
} # forを閉じている
#---------------------------------------------ここまで上と同じ
return(kekka) # function()の結果を出力する
} # 独自の命令function()ココまで
setwd("NICER_NS")
result <- myTTR() # TTRの指標の結果を保存して、
head(result)
## [1] "NS501.txt\t359\t736\t0.487771739130435"
## [2] "NS502.txt\t340\t636\t0.534591194968553"
## [3] "NS503.txt\t353\t834\t0.42326139088729"
## [4] "NS504.txt\t336\t824\t0.407766990291262"
## [5] "NS505.txt\t393\t898\t0.437639198218263"
## [6] "NS506.txt\t339\t829\t0.408926417370326"
setwd("NICER_NS")
result <- myTTR() # TTRの指標の結果を保存して、
write.table(result, "../TTR.NS.kekka.txt", col.names=F, row.names=F, quote = F)
setwd("NICER_NS")
TTR.NS.df <- read.delim("../TTR.NS.kekka.txt", header=F)
head(TTR.NS.df)
## V1 V2 V3 V4
## 1 NS501.txt 359 736 0.4877717
## 2 NS502.txt 340 636 0.5345912
## 3 NS503.txt 353 834 0.4232614
## 4 NS504.txt 336 824 0.4077670
## 5 NS505.txt 393 898 0.4376392
## 6 NS506.txt 339 829 0.4089264
setwd("NICER_NNS")
result <- myTTR()
TTR.NNS.df <- read.delim(text = result, header = F)
head(TTR.NNS.df)
## V1 V2 V3 V4
## 1 JPN501.txt 134 319 0.4200627
## 2 JPN502.txt 158 351 0.4501425
## 3 JPN503.txt 121 201 0.6019900
## 4 JPN504.txt 139 260 0.5346154
## 5 JPN505.txt 174 417 0.4172662
## 6 JPN506.txt 123 260 0.4730769
data.frame(ベクトル1, ベクトル2, ベクトル3)
myTTR.df <- function(){ # 独自の命令の名前は変えておきましょう
# ------------------------------------------
# kekka <- NULL # kekkaは使わない
# 使用する空のベクトルを用意
fileV <- NULL
typeV <- NULL
tokenV <- NULL
TTRV <- NULL
file.zenbu <- list.files() #
ruiseki <- "" #
for (i in file.zenbu){ #
yomikomi <- readLines(i, warn=F) #
tmp1 <- grep("\\*(JPN|NS)", yomikomi, value=T) #
tmp2 <- gsub("\\*(JPN|NS)...:\t", "", tmp1) #
tmp2b <- gsub("[[:punct:]]", "", tmp2) #
tmp2c <- tolower(tmp2b) #
tmp3 <- strsplit(tmp2c, " ") #
tmp4 <- unlist(tmp3) #
tmp4 <- tmp4[tmp4 != ""] #
token.list <- sort(tmp4) #
type.list <- unique(token.list) #
token <- length(token.list) #
type <- length(type.list) #
TTR <- type/token #
# TTR.index <- paste(i, type, token, TTR, sep="\t")
# kekka <- c(kekka, TTR.index) #
# 各要素の種類ごとにベクトルを作成
fileV <- c(fileV, i)
tokenV <- c(tokenV, token)
typeV <- c(typeV, type)
TTRV <- c(TTRV, TTR)
} # forを閉じている
#---------------------------------------------ここまで上と同じ
#return(kekka) # function()の結果を出力する
data.frame(fileV, tokenV, typeV, TTRV) # 複数のベクトルをまとめてデータフレームにする
} # 独自の命令function()ココまで
setwd("NICER_NNS")
TTR.NNS.df <- myTTR.df()
head(TTR.NNS.df )
## fileV tokenV typeV TTRV
## 1 JPN501.txt 319 134 0.4200627
## 2 JPN502.txt 351 158 0.4501425
## 3 JPN503.txt 201 121 0.6019900
## 4 JPN504.txt 260 139 0.5346154
## 5 JPN505.txt 417 174 0.4172662
## 6 JPN506.txt 260 123 0.4730769
names(TTR.NNS.df) <- c("ID", "Token", "Type", "TTR")
head(TTR.NNS.df)
## ID Token Type TTR
## 1 JPN501.txt 319 134 0.4200627
## 2 JPN502.txt 351 158 0.4501425
## 3 JPN503.txt 201 121 0.6019900
## 4 JPN504.txt 260 139 0.5346154
## 5 JPN505.txt 417 174 0.4172662
## 6 JPN506.txt 260 123 0.4730769
TTR.NNS.df$Token
## [1] 319 351 201 260 417 260 355 195 260 183 419 398 336 190 268 324 326 255
## [19] 449 414 372 200 194 285 168 344 369 324 321 207 326 201 183 301 395 273
## [37] 448 192 187 284 211 305 453 225 576 315 218 475 385 240 249 234 183 344
## [55] 331 188 366 277 182 243 248 326 259 367 609 221 199 211 313 293 263 378
## [73] 211 248 320 331 265 273 249 196 117 303 328 542 277 189 309 260 195 350
## [91] 366 203 236 273 232 274 359 193 251 374 277 360 295 380 390 201 240 200
## [109] 358 284 290 283 207 250 278 223 275 298 269 263 258 274 224 297 191 430
## [127] 334 259 219 194 221 275 289 220 240 320 247 326 144 225 262 255 379 293
## [145] 378 425 307 307 224 470 516 184 322 374 433 381 397 456 166 274 487 333
## [163] 728 314 294 203 648 341 302 516 266 289 444 446 262 255 410 331 342 484
## [181] 116 497 551 275 286 297 242 426 353 288 447 390 266 403 233 270 370 535
## [199] 236 250 305 582 454 235 177 256 395 247 270 174 190 314 217 380 299 319
## [217] 331 322 392 255 85 188 414 221 321 312 188 301 169 356 346 150 205 237
## [235] 148 194 209 209 275 222 193 233 150 216 226 200 229 184 199 189 235 241
## [253] 326 183 325 208 282 306 174 194 329 156 213 219 270 306 153 210 119 230
## [271] 224 224 258 254 281 245 214 278 216 178 223 324 199 187 263 370 136 199
## [289] 213 188 139 197 135 210 242 192 259 223 130 352 218 266 195 320 167 170
## [307] 265 267 164 290 281 288 300 252 226 301 224 326 280 220 207 282 337 305
## [325] 418 152 410 251 276 323 203 252 367 235 196 244 234 301 267 138 269 267
## [343] 197 147 237 254 264 244 239 177 202 227 134 219 142 235 332 213 298 199
## [361] 204 180 274 199 275 168 265 204 250 192 188 340 265 177 223 124 207 264
## [379] 212 156 134
plot(TTR.NNS.df$Token)
plot(TTR.NNS.df$Token, TTR.NNS.df$Type)
plot(TTR.NNS.df$Token, TTR.NNS.df$TTR)
plot(TTR.NNS.df$Type, TTR.NNS.df$TTR)
myGI.df <- function(){ # 独自の命令の名前は変えておきましょう
# ------------------------------------------
# 使用する空のベクトルを用意
fileV <- NULL
typeV <- NULL
tokenV <- NULL
TTRV <- NULL
GIV <- NULL # GI用に追加
file.zenbu <- list.files() #
ruiseki <- "" #
for (i in file.zenbu){ #
yomikomi <- readLines(i, warn=F) #
tmp1 <- grep("\\*(JPN|NS)", yomikomi, value=T) #
tmp2 <- gsub("\\*(JPN|NS)...:\t", "", tmp1) #
tmp2b <- gsub("[[:punct:]]", "", tmp2) #
tmp2c <- tolower(tmp2b) #
tmp3 <- strsplit(tmp2c, " ") #
tmp4 <- unlist(tmp3) #
tmp4 <- tmp4[tmp4 != ""] #
token.list <- sort(tmp4) #
type.list <- unique(token.list) #
token <- length(token.list) #
type <- length(type.list) #
TTR <- type/token #
GI <- type/sqrt(token) # GIの追加
# 各要素の種類ごとにベクトルを作成
fileV <- c(fileV, i)
tokenV <- c(tokenV, token)
typeV <- c(typeV, type)
TTRV <- c(TTRV, TTR)
GIV <- c(GIV, GI) # GIの追加
} # forを閉じている
#---------------------------------------------ここまで上と同じ
#return(kekka) # function()の結果を出力する
data.frame(fileV, tokenV, typeV, TTRV, GIV) # 複数のベクトルをまとめてデータフレームにする
} # 独自の命令function()ココまで
setwd("NICER_NNS")
NNS.GI.df <- myGI.df()
names(NNS.GI.df) <- c("ID", "Token", "Type", "TTR", "GI") # 見出しの名前も変えて
head(NNS.GI.df )
## ID Token Type TTR GI
## 1 JPN501.txt 319 134 0.4200627 7.502560
## 2 JPN502.txt 351 158 0.4501425 8.433416
## 3 JPN503.txt 201 121 0.6019900 8.534682
## 4 JPN504.txt 260 139 0.5346154 8.620414
## 5 JPN505.txt 417 174 0.4172662 8.520817
## 6 JPN506.txt 260 123 0.4730769 7.628136
str(NNS.GI.df)
## 'data.frame': 381 obs. of 5 variables:
## $ ID : chr "JPN501.txt" "JPN502.txt" "JPN503.txt" "JPN504.txt" ...
## $ Token: int 319 351 201 260 417 260 355 195 260 183 ...
## $ Type : int 134 158 121 139 174 123 149 97 103 99 ...
## $ TTR : num 0.42 0.45 0.602 0.535 0.417 ...
## $ GI : num 7.5 8.43 8.53 8.62 8.52 ...
plot(NNS.GI.df$Token, NNS.GI.df$GI)
#```{r, echo=F}
myNoS.df <- function(){ # 独自の命令の名前は変えておきましょう
fileV <- NULL
typeV <- NULL
tokenV <- NULL
TTRV <- NULL
GIV <- NULL
NoSV <- NULL # NoS用に追加
file.zenbu <- list.files() #
ruiseki <- "" #
for (i in file.zenbu){ #
yomikomi <- readLines(i, warn=F) #
tmp1 <- grep("\\*(JPN|NS)", yomikomi, value=T) #
tmp2 <- gsub("\\*(JPN|NS)...:\t", "", tmp1) #
tmp2b <- gsub("[[:punct:]]", "", tmp2) #
tmp2c <- tolower(tmp2b) #
tmp3 <- strsplit(tmp2c, " ") #
tmp4 <- unlist(tmp3) #
tmp4 <- tmp4[tmp4 != ""] #
token.list <- sort(tmp4) #
type.list <- unique(token.list) #
token <- length(token.list) #
type <- length(type.list) #
TTR <- type/token #
GI <- type/sqrt(token)
NoS <- length(tmp1) # 文の数の追加
# 各要素の種類ごとにベクトルを作成
fileV <- c(fileV, i)
tokenV <- c(tokenV, token)
typeV <- c(typeV, type)
TTRV <- c(TTRV, TTR)
GIV <- c(GIV, GI)
NoSV <- c(NoSV, NoS) # NoSの追加
}
data.frame(fileV, tokenV, typeV, TTRV, GIV, NoSV) # 追加修正
}
setwd("NICER_NNS")
NNS.Index.df <- myNoS.df()
names(NNS.Index.df) <- c("ID", "Token", "Type", "TTR", "GI", "NoS") # 見出しの名前も変えて
head(NNS.Index.df )
## ID Token Type TTR GI NoS
## 1 JPN501.txt 319 134 0.4200627 7.502560 30
## 2 JPN502.txt 351 158 0.4501425 8.433416 29
## 3 JPN503.txt 201 121 0.6019900 8.534682 13
## 4 JPN504.txt 260 139 0.5346154 8.620414 27
## 5 JPN505.txt 417 174 0.4172662 8.520817 25
## 6 JPN506.txt 260 123 0.4730769 7.628136 20
#```{r, echo=F}
myASL.df <- function(){ # 独自の命令の名前は変えておきましょう
fileV <- NULL
typeV <- NULL
tokenV <- NULL
TTRV <- NULL
GIV <- NULL
NoSV <- NULL
ASLV <- NULL # ASL用
file.zenbu <- list.files() #
ruiseki <- "" #
for (i in file.zenbu){ #
yomikomi <- readLines(i, warn=F) #
tmp1 <- grep("\\*(JPN|NS)", yomikomi, value=T) #
tmp2 <- gsub("\\*(JPN|NS)...:\t", "", tmp1) #
tmp2b <- gsub("[[:punct:]]", "", tmp2) #
tmp2c <- tolower(tmp2b) #
tmp3 <- strsplit(tmp2c, " ") #
tmp4 <- unlist(tmp3) #
tmp4 <- tmp4[tmp4 != ""] #
token.list <- sort(tmp4) #
type.list <- unique(token.list) #
token <- length(token.list) #
type <- length(type.list) #
TTR <- type/token #
GI <- type/sqrt(token)
NoS <- length(tmp1)
ASL <- token/NoS # 単語総数を文の数で割る
# 各要素の種類ごとにベクトルを作成
fileV <- c(fileV, i)
tokenV <- c(tokenV, token)
typeV <- c(typeV, type)
TTRV <- c(TTRV, TTR)
GIV <- c(GIV, GI)
NoSV <- c(NoSV, NoS)
ASLV <- c(ASLV, ASL) # ASLの追加
}
data.frame(fileV, tokenV, typeV, TTRV, GIV, NoSV, ASLV) # 追加修正
}
setwd("NICER_NNS")
NNS.Index.df <- myASL.df()
names(NNS.Index.df) <- c("ID", "Token", "Type", "TTR", "GI", "NoS", "ASL") # 見出しの名前も変えて
head(NNS.Index.df )
## ID Token Type TTR GI NoS ASL
## 1 JPN501.txt 319 134 0.4200627 7.502560 30 10.63333
## 2 JPN502.txt 351 158 0.4501425 8.433416 29 12.10345
## 3 JPN503.txt 201 121 0.6019900 8.534682 13 15.46154
## 4 JPN504.txt 260 139 0.5346154 8.620414 27 9.62963
## 5 JPN505.txt 417 174 0.4172662 8.520817 25 16.68000
## 6 JPN506.txt 260 123 0.4730769 7.628136 20 13.00000
paste(token, collapse="")
#```{r, echo=F}
myAWL.df <- function(){ # 独自の命令の名前は変えておきましょう
fileV <- NULL
typeV <- NULL
tokenV <- NULL
TTRV <- NULL
GIV <- NULL
NoSV <- NULL
ASLV <- NULL
AWLV <- NULL # AWL用
file.zenbu <- list.files() #
ruiseki <- "" #
for (i in file.zenbu){ #
yomikomi <- readLines(i, warn=F) #
tmp1 <- grep("\\*(JPN|NS)", yomikomi, value=T) #
tmp2 <- gsub("\\*(JPN|NS)...:\t", "", tmp1) #
tmp2b <- gsub("[[:punct:]]", "", tmp2) #
tmp2c <- tolower(tmp2b) #
tmp3 <- strsplit(tmp2c, " ") #
tmp4 <- unlist(tmp3) #
tmp4 <- tmp4[tmp4 != ""] #
token.list <- sort(tmp4) #
type.list <- unique(token.list) #
token <- length(token.list) #
type <- length(type.list) #
TTR <- type/token #
GI <- type/sqrt(token)
NoS <- length(tmp1)
ASL <- token/NoS
mojiretu <- paste(token.list, collapse="") # 長い文字列
mojisuu <- nchar(mojiretu) # 文字数
AWL <- mojisuu/token # 文字数÷単語数
# 各要素の種類ごとにベクトルを作成
fileV <- c(fileV, i)
tokenV <- c(tokenV, token)
typeV <- c(typeV, type)
TTRV <- c(TTRV, TTR)
GIV <- c(GIV, GI)
NoSV <- c(NoSV, NoS)
ASLV <- c(ASLV, ASL)
AWLV <- c(AWLV, AWL) # AWLの追加
}
data.frame(fileV, tokenV, typeV, TTRV, GIV, NoSV, ASLV, AWLV) # 追加修正
}
setwd("NICER_NNS")
NNS.Index.df <- myAWL.df()
names(NNS.Index.df) <- c("ID", "Token", "Type", "TTR", "GI", "NoS", "ASL", "AWL") # 見出しの名前も変えて
head(NNS.Index.df )
## ID Token Type TTR GI NoS ASL AWL
## 1 JPN501.txt 319 134 0.4200627 7.502560 30 10.63333 4.304075
## 2 JPN502.txt 351 158 0.4501425 8.433416 29 12.10345 4.293447
## 3 JPN503.txt 201 121 0.6019900 8.534682 13 15.46154 4.746269
## 4 JPN504.txt 260 139 0.5346154 8.620414 27 9.62963 4.765385
## 5 JPN505.txt 417 174 0.4172662 8.520817 25 16.68000 4.023981
## 6 JPN506.txt 260 123 0.4730769 7.628136 20 13.00000 4.088462
pairs(NNS.Index.df[,-1])
@Topic: education
@EnglishEssay: 4
@SelfEval: 3
@TopicEase: 3
@EssayTraining: 5
@Proctor: 1
@Criterion: 6
@Topic:
の行as.integer()
#```{r, echo=F}
myIndex.df <- function(){ # 独自の命令の名前は変えておきましょう
topicV <- NULL # topic用
scoreV <- NULL # score用
fileV <- NULL
typeV <- NULL
tokenV <- NULL
TTRV <- NULL
GIV <- NULL
NoSV <- NULL
ASLV <- NULL
AWLV <- NULL
file.zenbu <- list.files() #
ruiseki <- "" #
for (i in file.zenbu){ #
yomikomi <- readLines(i, warn=F) #
topic.tmp <- grep("@Topic:", yomikomi, value=T) # Topicの行
topic <- gsub("@Topic:\t", "", topic.tmp) # 不要部分削除
score.tmp <- grep("@Criterion", yomikomi, value=T) # Scoreの行
score <- gsub("@Criterion:\t", "", score.tmp) # 不要部分削除
tmp1 <- grep("\\*(JPN|NS)", yomikomi, value=T) #
tmp2 <- gsub("\\*(JPN|NS)...:\t", "", tmp1) #
tmp2b <- gsub("[[:punct:]]", "", tmp2) #
tmp2c <- tolower(tmp2b) #
tmp3 <- strsplit(tmp2c, " ") #
tmp4 <- unlist(tmp3) #
tmp4 <- tmp4[tmp4 != ""] #
token.list <- sort(tmp4) #
type.list <- unique(token.list) #
token <- length(token.list) #
type <- length(type.list) #
TTR <- type/token #
GI <- type/sqrt(token)
NoS <- length(tmp1)
ASL <- token/NoS
mojiretu <- paste(token.list, collapse="") #
mojisuu <- nchar(mojiretu) #
AWL <- mojisuu/token #
score <- as.integer(score) # scoreを整数に
# 各要素の種類ごとにベクトルを作成
topicV <- c(topicV, topic) # Topicの追加
scoreV <- c(scoreV, score) # Scoreの追加
fileV <- c(fileV, i)
tokenV <- c(tokenV, token)
typeV <- c(typeV, type)
TTRV <- c(TTRV, TTR)
GIV <- c(GIV, GI)
NoSV <- c(NoSV, NoS)
ASLV <- c(ASLV, ASL)
AWLV <- c(AWLV, AWL)
}
data.frame(fileV, topicV, scoreV, tokenV, typeV, TTRV, GIV, NoSV, ASLV, AWLV) # 追加修正
}
setwd("NICER_NNS")
NNS.Index.df <- myIndex.df()
names(NNS.Index.df) <- c("ID", "Topic", "Score", "Token", "Type", "TTR", "GI", "NoS", "ASL", "AWL") # 見出しの名前も変えて
head(NNS.Index.df)
plot(NNS.Index.df$NoS, NNS.Index.df$Token)
cor.test(NNS.Index.df$NoS, NNS.Index.df$Token)
##
## Pearson's product-moment correlation
##
## data: NNS.Index.df$NoS and NNS.Index.df$Token
## t = 23.697, df = 379, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7287991 0.8102583
## sample estimates:
## cor
## 0.7726906