myAllTokens <- function() { output.file = choose.files() token.all <- "" files <- list.files() for (i in files) { token <- "" lines.tmp <- scan(i, what="char", sep="\n") data.tmp <- grep("\\*(JPN|NS)...:\t", lines.tmp, value=T) body.tmp <- gsub("\\*(JPN|NS)...:\t", "", data.tmp) body.tmp <- body.tmp[body.tmp != ""] body.lower <- tolower(body.tmp) body.token <- unlist(strsplit(body.lower, "\\W")) body.token <- body.token[body.token != ""] token.all <- c(token.all, body.token) } token.all <- sort(token.all) write(token.all, file=output.file) }