R R.scripts !!!myIndexNICER3 {{outline}} !ファイル名・スコア・言語指標を取り出す *NICERのCHATフォーマットのデータの入っているフォルダー内で、 *各種言語指標とファイル名とエッセイのスコアを取り出して、「リスト」として出力する。 {{pre myIndexNICER3 <- function(){ result <- list() files <- list.files() k <- 0 for (i in files){ k <- k +1 Token <- 0 Type <- 0 NoS <- 0 TTR <- 0 GI <- 0 AWL <- 0 ASL <- 0 wttr <- 0 ttrsum <- 0 MATTR <- 0 lines.tmp <- scan(i, what="char", sep="\n", quiet = T) #Criterion score criterion.tmp <- grep("@Criterion", lines.tmp, value = T) Score <- gsub("@Criterion:\t", "", criterion.tmp) #----NICER data.tmp <- grep("\\*(JPN|NS)...:\t", lines.tmp, value=T) body.tmp <- gsub("\\*(JPN|NS)...:\t", "", data.tmp) body.tmp <- body.tmp[body.tmp != ""] #---- lines.lower <- tolower(body.tmp) words.tmp <- unlist(strsplit(lines.lower, "\\W+")) Token <- length(words.tmp) Type <- length(unique(words.tmp)) NoS <- length(lines.tmp) TTR <- Type/Token GI <- Type/sqrt(Token) AWL <- nchar(paste(words.tmp, collapse=""))/Token ASL <- Token/NoS words.tmp2 <- c(words.tmp, words.tmp) for (j in 1:Token){ mado <- words.tmp2[j:(99+j)] wttr <- length(unique(sort(mado)))/100 ttrsum <- ttrsum + wttr } MATTR <- ttrsum/Token #cat(i, Score, Token, Type, NoS, TTR, GI, MATTR, AWL, ASL, "\n", file=output.file, append=T) #cat(i, Score, Token, Type, NoS, TTR, GI, MATTR, AWL, ASL, "\n") #print(i, Score, Token, Type, NoS, TTR, GI, MATTR, AWL, ASL) result[[k]] <- list(i, Score, Token, Type, NoS, TTR, GI, MATTR, AWL, ASL) } return(result) } }} !その後の処理 *unlistして、matrixとして一覧表にする。 > matrix(unlist(JP_result), nrow=10, ncol=50) *縦横変換 t() {{pre > t(matrix(unlist(JP_result), nrow=10, ncol=50)) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [1,] "JPN501.txt" "4" "319" "135" "123" "0.423197492163009" "7.55854889790403" "0.592131661442006" "4.30407523510972" "2.59349593495935" [2,] "JPN502.txt" "4" "356" "161" "120" "0.452247191011236" "8.5329829340512" "0.664915730337079" "4.23314606741573" "2.96666666666667" [3,] "JPN503.txt" "3" "201" "121" "70" "0.601990049751244" "8.53468195188904" "0.717014925373134" "4.74626865671642" "2.87142857142857" [4,] "JPN504.txt" "4" "260" "140" "114" "0.538461538461538" "8.68243142124459" "0.687769230769229" "4.76153846153846" "2.28070175438596" [5,] "JPN505.txt" "4" "420" "175" "106" "0.416666666666667" "8.53912563829967" "0.634190476190476" "3.9952380952381" "3.9622641509434" [6,] "JPN506.txt" "3" "261" "124" "93" "0.475095785440613" "7.67540731131814" "0.639003831417626" "4.0727969348659" "2.80645161290323" }} *データフレームに {{pre > as.data.frame(t(matrix(unlist(JP_result), nrow=10, ncol=50))) V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 1 JPN501.txt 4 319 135 123 0.423197492163009 7.55854889790403 0.592131661442006 4.30407523510972 2.59349593495935 2 JPN502.txt 4 356 161 120 0.452247191011236 8.5329829340512 0.664915730337079 4.23314606741573 2.96666666666667 3 JPN503.txt 3 201 121 70 0.601990049751244 8.53468195188904 0.717014925373134 4.74626865671642 2.87142857142857 4 JPN504.txt 4 260 140 114 0.538461538461538 8.68243142124459 0.687769230769229 4.76153846153846 2.28070175438596 5 JPN505.txt 4 420 175 106 0.416666666666667 8.53912563829967 0.634190476190476 3.9952380952381 3.9622641509434 6 JPN506.txt 3 261 124 93 0.475095785440613 7.67540731131814 0.639003831417626 4.0727969348659 2.80645161290323 }}