* using log directory 'd:/Rcompile/CRANpkg/local/4.5/topicmodels.etm.Rcheck' * using R version 4.5.1 (2025-06-13 ucrt) * using platform: x86_64-w64-mingw32 * R was compiled by gcc.exe (GCC) 14.2.0 GNU Fortran (GCC) 14.2.0 * running under: Windows Server 2022 x64 (build 20348) * using session charset: UTF-8 * checking for file 'topicmodels.etm/DESCRIPTION' ... OK * checking extension type ... Package * this is package 'topicmodels.etm' version '0.1.0' * package encoding: UTF-8 * checking package namespace information ... OK * checking package dependencies ... OK * checking if this is a source package ... OK * checking if there is a namespace ... OK * checking for hidden files and directories ... OK * checking for portable file names ... OK * checking whether package 'topicmodels.etm' can be installed ... OK * checking installed package size ... OK * checking package directory ... OK * checking DESCRIPTION meta-information ... OK * checking top-level files ... OK * checking for left-over files ... OK * checking index information ... OK * checking package subdirectories ... OK * checking code files for non-ASCII characters ... OK * checking R files for syntax errors ... OK * checking whether the package can be loaded ... [4s] OK * checking whether the package can be loaded with stated dependencies ... [4s] OK * checking whether the package can be unloaded cleanly ... [3s] OK * checking whether the namespace can be loaded with stated dependencies ... [4s] OK * checking whether the namespace can be unloaded cleanly ... [4s] OK * checking loading without being on the library search path ... [4s] OK * checking use of S3 registration ... OK * checking dependencies in R code ... OK * checking S3 generic/method consistency ... OK * checking replacement functions ... OK * checking foreign function calls ... OK * checking R code for possible problems ... [9s] OK * checking Rd files ... [1s] NOTE checkRd: (-1) ETM.Rd:33: Lost braces in \itemize; \value handles \item{}{} directly checkRd: (-1) ETM.Rd:34: Lost braces in \itemize; \value handles \item{}{} directly checkRd: (-1) ETM.Rd:35: Lost braces in \itemize; \value handles \item{}{} directly checkRd: (-1) ETM.Rd:36: Lost braces in \itemize; \value handles \item{}{} directly checkRd: (-1) ETM.Rd:37: Lost braces in \itemize; \value handles \item{}{} directly * checking Rd metadata ... OK * checking Rd cross-references ... OK * checking for missing documentation entries ... OK * checking for code/documentation mismatches ... OK * checking Rd \usage sections ... OK * checking Rd contents ... OK * checking for unstated dependencies in examples ... OK * checking contents of 'data' directory ... OK * checking data for non-ASCII characters ... [1s] OK * checking data for ASCII and uncompressed saves ... OK * checking examples ... [5s] ERROR Running examples in 'topicmodels.etm-Ex.R' failed The error most likely occurred in: > ### Name: ETM > ### Title: Topic Modelling in Semantic Embedding Spaces > ### Aliases: ETM > > ### ** Examples > > library(torch) > library(topicmodels.etm) > library(word2vec) > library(udpipe) > data(brussels_reviews_anno, package = "udpipe") > ## > ## Toy example with pretrained embeddings > ## > > ## a. build word2vec model > x <- subset(brussels_reviews_anno, language %in% "nl") > x <- paste.data.frame(x, term = "lemma", group = "doc_id") > set.seed(4321) > w2v <- word2vec(x = x$lemma, dim = 15, iter = 20, type = "cbow", min_count = 5) > embeddings <- as.matrix(w2v) > > ## b. build document term matrix on nouns + adjectives, align with the embedding terms > dtm <- subset(brussels_reviews_anno, language %in% "nl" & upos %in% c("NOUN", "ADJ")) > dtm <- document_term_frequencies(dtm, document = "doc_id", term = "lemma") > dtm <- document_term_matrix(dtm) > dtm <- dtm_conform(dtm, columns = rownames(embeddings)) > dtm <- dtm[dtm_rowsums(dtm) > 0, ] > > ## create and fit an embedding topic model - 8 topics, theta 100-dimensional > if (torch::torch_is_installed()) { + + set.seed(4321) + torch_manual_seed(4321) + model <- ETM(k = 8, dim = 100, embeddings = embeddings, dropout = 0.5) + optimizer <- optim_adam(params = model$parameters, lr = 0.005, weight_decay = 0.0000012) + overview <- model$fit(data = dtm, optimizer = optimizer, epoch = 40, batch_size = 1000) + scores <- predict(model, dtm, type = "topics") + + lastbatch <- subset(overview$loss, overview$loss$batch_is_last == TRUE) + plot(lastbatch$epoch, lastbatch$loss) + plot(overview$loss_test) + + ## show top words in each topic + terminology <- predict(model, type = "terms", top_n = 7) + terminology + + ## + ## Toy example without pretrained word embeddings + ## + set.seed(4321) + torch_manual_seed(4321) + model <- ETM(k = 8, dim = 100, embeddings = 15, dropout = 0.5, vocab = colnames(dtm)) + optimizer <- optim_adam(params = model$parameters, lr = 0.005, weight_decay = 0.0000012) + overview <- model$fit(data = dtm, optimizer = optimizer, epoch = 40, batch_size = 1000) + terminology <- predict(model, type = "terms", top_n = 7) + terminology + + + + ## Don't show: + ## + ## Another example using fit_original + ## + data(ng20, package = "topicmodels.etm") + vocab <- ng20$vocab + tokens <- ng20$bow_tr$tokens + counts <- ng20$bow_tr$counts + + torch_manual_seed(123456789) + model <- ETM(k = 4, vocab = vocab, dim = 5, embeddings = 25) + model + optimizer <- optim_adam(params = model$parameters, lr = 0.005, weight_decay = 0.0000012) + + traindata <- list(tokens = tokens, counts = counts, vocab = vocab) + test1 <- list(tokens = ng20$bow_ts_h1$tokens, counts = ng20$bow_ts_h1$counts, vocab = vocab) + test2 <- list(tokens = ng20$bow_ts_h2$tokens, counts = ng20$bow_ts_h2$counts, vocab = vocab) + + out <- model$fit_original(data = traindata, test1 = test1, test2 = test2, epoch = 4, + optimizer = optimizer, batch_size = 1000, + lr_anneal_factor = 4, lr_anneal_nonmono = 10) + test <- subset(out$loss, out$loss$batch_is_last == TRUE) + plot(test$epoch, test$loss) + + topic.centers <- as.matrix(model, type = "embedding", which = "topics") + word.embeddings <- as.matrix(model, type = "embedding", which = "words") + topic.terminology <- as.matrix(model, type = "beta") + + terminology <- predict(model, type = "terms", top_n = 4) + terminology + ## End(Don't show) + + } * checking for unstated dependencies in 'tests' ... OK * checking tests ... [1s] OK Running 'tinytest.R' [0s] * checking PDF version of manual ... [20s] OK * checking HTML version of manual ... [1s] OK * DONE Status: 1 ERROR, 1 NOTE