R R.scripts {{category CHILDES}} !!!co2list CHATファイル内の分析コード(%co2: )の抽出 {{pre *CHI: ringo wa +... %xmor: n|ringo=apple ptl:top|wa=TOP +... %co2: NP[N P] %ort: リンゴは . }} {{pre co2list <- function() { token.all <- "" files <- list.files() for (i in files) { token <- "" lines.tmp <- scan(i, what="char", sep="\n") body.tmp <- grep("^%co2", lines.tmp, value=T) body.tmp2 <- gsub("%co2:\t", "", body.tmp) body.tmp2 <- gsub("\\s+", " ", body.tmp2) #複数のペースを一つに token.all <- c(token.all, body.tmp2) } code.list <- sort(token.all) code.list <- code.list[code.list != ""] as.data.frame(code.list) } }} {{pre > head(co2result) code.list 1 NP[A N] 2 NP[A N] 3 NP[A N] 4 NP[A N] 5 NP[A N] 6 NP[A N] }} !co2list2 *月齢とともにコードを抽出 {{pre > co2list2 <- function() { token.all <- "" files <- list.files() for (i in files) { token <- "" lines.tmp <- scan(i, what="char", sep="\n") #月齢の取得 geturei.tmp <- grep("^@ID:\t.*CHI", lines.tmp, value=T) geturei.tmp2 <- gsub("^@ID:\t.*CHI\\|", "", geturei.tmp) geturei.tmp3 <- gsub("\\|.+", "", geturei.tmp2) body.tmp <- grep("^%co2", lines.tmp, value=T) body.tmp2 <- gsub("%co2:\t", "", body.tmp) body.tmp2 <- gsub("\\s+", " ", body.tmp2) #複数のペースを一つに getureiTabCodeItem <- paste(geturei.tmp3, body.tmp2, sep="\t") token.all <- c(token.all, getureiTabCodeItem) } code.list <- sort(token.all) code.list <- code.list[code.list != ""] as.data.frame(code.list) } }} {{pre > head(co2result2) code.list 1 1;11.29\tUN[N A p] 2 2;00.05\tNP[A N] 3 2;01.03\tUN[N N] 4 2;02.00\tSP[A p] 5 2;02.00\tSP[A p] 6 2;02.00\tSP[N p] }}