---
title: "Esperantisto"
output: html_notebook
---
```{r}
library(ghql)
library(dplyr)
library(jsonlite)
gql_conn <- GraphqlClient$new(url = 'http://localhost:4000')
words_query <- '
query wordEmbeddings {
words {
word
fastrpEmbedding
}
}
'
rq <- Query$new()$query('link', words_query)
result <- gql_conn$exec(rq$link) %>%
fromJSON(flatten = F)
words <- result$data$words %>% as_tibble()
# XXX: Ni havas duobligo! Ŝajnas, ke ni bezonas pliigi dimensiojn dum enkonstruado...
deduped_words <- words %>% distinct(fastrpEmbedding, .keep_all = TRUE)
```
```{r}
library(tidyr)
unnested_words <- deduped_words %>% rename(frp=fastrpEmbedding) %>% unnest_wider(frp, names_sep='')
```
```{r}
library(Rtsne)
library(tidyverse)
unnested_words_with_id <- unnested_words %>% mutate(ID=row_number())
set.seed(42)
tSNE_fit <- unnested_words_with_id %>%
select(where(is.numeric)) %>%
column_to_rownames("ID") %>%
scale() %>%
Rtsne()
```
```{r}
tSNE_df <- tSNE_fit$Y %>%
as.data.frame() %>%
rename(tSNE1="V1",
tSNE2="V2") %>%
mutate(ID=row_number()) %>%
inner_join(unnested_words_with_id, by="ID") %>%
select(-ID)
```
```{r}
set.seed(42)
tSNE_df %>% sample_n(500) %>% ggplot(aes(x=tSNE1, y=tSNE2, label=word)) + geom_text(aes(label=word), hjust=0, vjust=0, color="blue")
```