myWordListG <- function(){
lines.tmp <- scan(choose.files(), what="char", sep="\n")
data.tmp <- lines.tmp
data.tmp <- data.tmp[data.tmp != ""]
tmp4 <- gsub("[[:punct:]]", " ", data.tmp)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
table(token)
}
myWordListG()
sort(table(token))
sort(table(token), decreasing = T)
plot(head(sort(table(token), decreasing = T)))
head(sort(table(token), decreasing = T), 20)
plot(head(sort(table(token), decreasing = T), 20))
myGI <- function(d){
tmp4 <- gsub("[[:punct:]]", " ", d)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
# length(type)/length(token)
length(type)/sqrt(length(token))
}
myGI(ns502.data)
myGI(ns501.data)
myGI(ns502.data)
myGI(ns502.data)
myGI(ns503.data)
myGI(ns504.data)
myGI(ns505.data)
myGI(ns506.data)
myGI(ns507.data)
myGI(ns508.data)
myGI(ns509.data)
myGI(ns510.data)
for (条件) {
すること
すること
}
例: i in 1:19
1から19まで、順に i に入れる
files <- list.files()
例: i in files
filesの中の個々のファイルを、順に i に入れる
myGI(myData())
myData <- function(){
lines.tmp <- scan(choose.files(), what="char", sep="\n")
body.tmp <- grep("^\\*\\w+:\t", lines.tmp, value=T)
data.tmp <- gsub("^\\*\\w+:\t", "", body.tmp)
data.tmp <- data.tmp[data.tmp != ""]
}
myGI <- function(d){
tmp4 <- gsub("[[:punct:]]", " ", d)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
# length(type)/length(token)
length(type)/sqrt(length(token))
}
myGI2 <- function(){
# myData部分
lines.tmp <- scan(choose.files(), what="char", sep="\n")
body.tmp <- grep("^\\*\\w+:\t", lines.tmp, value=T)
data.tmp <- gsub("^\\*\\w+:\t", "", body.tmp)
data.tmp <- data.tmp[data.tmp != ""]
# myGI部分
# tmp4 <- gsub("[[:punct:]]", " ", d)
tmp4 <- gsub("[[:punct:]]", " ", data.tmp) # 上のmyData部分の結果data.tmpを受けて処理する
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
# length(type)/length(token)
length(type)/sqrt(length(token))
}
setwd("NICER1_3/NICER_NS")
myGI2()
## [1] 13.1793
myGI2F <- function(F){
# myData部分
# lines.tmp <- scan(choose.files(), what="char", sep="\n") # choose.files()をやめて
lines.tmp <- scan(F, what="char", sep="\n") # function(F)で指定したものを処理対象に
body.tmp <- grep("^\\*\\w+:\t", lines.tmp, value=T)
data.tmp <- gsub("^\\*\\w+:\t", "", body.tmp)
data.tmp <- data.tmp[data.tmp != ""]
# myGI部分
# tmp4 <- gsub("[[:punct:]]", " ", d)
tmp4 <- gsub("[[:punct:]]", " ", data.tmp)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
# length(type)/length(token)
length(type)/sqrt(length(token))
}
setwd("NICER1_3/NICER_NS")
files <- list.files()
for (i in files) {
gi <- myGI2F(i)
cat(gi, "\n") # for のなかに命令を入れる場合、明示的にcat()で結果を出力するようにする
# "\n" を付けて出力の際に「改行」する。
}
myGIfiles <- function(){
files <- list.files()
for (i in files) {
gi <- myGI2F(i)
cat(gi, "\n")
}
}
setwd("NICER1_3/NICER_NS")
myGIfiles()
myGIfiles2 <- function(){
files <- list.files()
for (i in files) {
# myData部分
# lines.tmp <- scan(choose.files(), what="char", sep="\n")
lines.tmp <- scan(i, what="char", sep="\n") # オブジェクトを i に合わせる
body.tmp <- grep("^\\*\\w+:\t", lines.tmp, value=T)
data.tmp <- gsub("^\\*\\w+:\t", "", body.tmp)
data.tmp <- data.tmp[data.tmp != ""]
# myGI部分
# tmp4 <- gsub("[[:punct:]]", " ", d)
tmp4 <- gsub("[[:punct:]]", " ", data.tmp)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
# length(type)/length(token)
# length(type)/sqrt(length(token))
gi <- length(type)/sqrt(length(token)) # 結果の部分を gi に入れる
cat(gi, "\n")
}
}
setwd("NICER1_3/NICER_NS")
myGIfiles2()
myTTRfiles <- function(){
files <- list.files()
for (i in files) {
# myData部分
# lines.tmp <- scan(choose.files(), what="char", sep="\n")
lines.tmp <- scan(i, what="char", sep="\n") # オブジェクトを i に合わせる
body.tmp <- grep("^\\*\\w+:\t", lines.tmp, value=T)
data.tmp <- gsub("^\\*\\w+:\t", "", body.tmp)
data.tmp <- data.tmp[data.tmp != ""]
# myGI部分
# tmp4 <- gsub("[[:punct:]]", " ", d)
tmp4 <- gsub("[[:punct:]]", " ", data.tmp)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
# length(type)/length(token)
# length(type)/sqrt(length(token))
ttr <- length(type)/length(token) # ここを修正
cat(ttr, "\n")
}
}
setwd("NICER1_3/NICER_NS")
myTTRfiles()
myVoc <- function(){
files <- list.files()
for (i in files) {
lines.tmp <- scan(i, what="char", sep="\n")
body.tmp <- grep("^\\*\\w+:\t", lines.tmp, value=T)
data.tmp <- gsub("^\\*\\w+:\t", "", body.tmp)
data.tmp <- data.tmp[data.tmp != ""]
tmp4 <- gsub("[[:punct:]]", " ", data.tmp)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
ttr <- length(type)/length(token)
gi <- length(type)/sqrt(length(token))
cat(length(type), length(token), ttr, gi, "\n")
}
}
setwd("NICER1_3/NICER_NS")
myVoc()
nchar("internationalization")
## [1] 20
nchar("two words")
## [1] 9
nchar("twowords")
## [1] 8
words <- c("two", "words")
paste(words, collapse="")
## [1] "twowords"
token <- unlist(strsplit(ns502.data, "\\W"))
tmp <- paste(token, collapse = "")
nchar(tmp)
myAWL <- function(d){
token <- unlist(strsplit(d, "\\W"))
tmp <- paste(token, collapse = "")
nchar(tmp)/length(token)
}
myAWL(ns502.data)
length(ns502.data)
myASL <- function(d){
token <- unlist(strsplit(d, "\\W"))
length(token)/length(d)
}
myASL(ns502.data)
0.39(語数/文数)+11.8(シラブル数/語数)-15.59
myRD <- function(d){
token <- unlist(strsplit(d, "\\W"))
tmp <- paste(tmp, collapse = "")
awl <- nchar(tmp)/length(token)
asl <- length(token)/length(d)
0.39*asl + 11.8*awl - 15.59
}
myRD(ns502.data)
画面に結果が出力されるところまででよいです。
四捨五入のコマンドは round() ** 小数点以下3桁目を四捨五入して2ケタまでだすには、round(変数, digits = 2)
myIndexes <- function(){
files <- list.files()
for (i in files) {
lines.tmp <- scan(i, what="char", sep="\n")
body.tmp <- grep("^\\*\\w+:\t", lines.tmp, value=T)
data.tmp <- gsub("^\\*\\w+:\t", "", body.tmp)
data.tmp <- data.tmp[data.tmp != ""]
tmp4 <- gsub("[[:punct:]]", " ", data.tmp)
tmp5 <- gsub(" +", " ", tmp4)
tmp6 <- tolower(tmp5)
tmp7 <- strsplit(tmp6, " ")
tmp8 <- unlist(tmp7)
token <- sort(tmp8)
type <- unique(token)
ttr <- length(type)/length(token)
gi <- length(type)/sqrt(length(token))
tmp <- paste(token, collapse = "")
awl <- nchar(tmp)/length(token)
asl <- length(token)/length(data.tmp)
cat(length(type), length(token), ttr, gi, awl, asl,"\n")
}
}
setwd("NICER1_3/NICER_NS")
myIndexes()