Assignment 4: Text Analysis Using Quanteda

US Presidential Inaugural Speeches

library(quanteda)
Warning: package 'quanteda' was built under R version 4.5.3
Package version: 4.3.1
Unicode version: 15.1
ICU version: 74.1
Parallel computing: 12 of 12 threads used.
See https://quanteda.io for tutorials and examples.
library(quanteda.textmodels)
Warning: package 'quanteda.textmodels' was built under R version 4.5.3
library(quanteda.textplots)
Warning: package 'quanteda.textplots' was built under R version 4.5.3
library(readr)
Warning: package 'readr' was built under R version 4.5.3
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.5.3
# Wordcloud
# based on US presidential inaugural address texts, and metadata (for the corpus), from 1789 to present.
dfm_inaug <- corpus_subset(data_corpus_inaugural, Year <= 1826) %>% 
  tokens(remove_punct = TRUE) %>% 
  tokens_remove(stopwords('english')) %>% 
  dfm() %>%
  dfm_trim(min_termfreq = 10, verbose = FALSE)

set.seed(100)
textplot_wordcloud(dfm_inaug)

inaug_speech = data_corpus_inaugural

corpus_subset(data_corpus_inaugural, 
              President %in% c("Eisenhower", "Johnson", "Nixon")) %>%
  tokens(remove_punct = TRUE) %>%
  tokens_remove(stopwords("english")) %>%
  dfm() %>%
  dfm_group(groups = President) %>%
  dfm_trim(min_termfreq = 5, verbose = FALSE) %>%
  textplot_wordcloud(comparison = TRUE)
Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
pledge could not be fit on page. It will not be plotted.
Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
mankind could not be fit on page. It will not be plotted.
Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
seeking could not be fit on page. It will not be plotted.
Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
nothing could not be fit on page. It will not be plotted.
Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
wisdom could not be fit on page. It will not be plotted.
Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
citizens could not be fit on page. It will not be plotted.

textplot_wordcloud(dfm_inaug, min_count = 10,
                   color = c('firebrick', 'pink', 'green', 'purple', 'orange', 'blue'))

data_corpus_inaugural_subset <- 
  corpus_subset(data_corpus_inaugural, Year > 1949)
kwic(tokens(data_corpus_inaugural_subset), pattern = "american") %>%
  textplot_xray()

textplot_xray(
  kwic(tokens(data_corpus_inaugural_subset), pattern = "America"),
  kwic(tokens(data_corpus_inaugural_subset), pattern = "Freedom"),
  kwic(tokens(data_corpus_inaugural_subset), pattern = "Safety")
  
)

theme_set(theme_bw())
g <- textplot_xray(
  kwic(tokens(data_corpus_inaugural_subset), pattern = "american"),
  kwic(tokens(data_corpus_inaugural_subset), pattern = "people"),
  kwic(tokens(data_corpus_inaugural_subset), pattern = "capitalist")
)
g + aes(color = keyword) + 
  scale_color_manual(values = c("blue", "red", "green")) +
  theme(legend.position = "none")

library(quanteda.textstats)
Warning: package 'quanteda.textstats' was built under R version 4.5.3
features_dfm_inaug <- textstat_frequency(dfm_inaug, n = 100)

# Sort by reverse frequency order
features_dfm_inaug$feature <- with(features_dfm_inaug, reorder(feature, -frequency))

ggplot(features_dfm_inaug, aes(x = feature, y = frequency)) +
  geom_point() + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

# Get frequency grouped by president
freq_grouped <- textstat_frequency(dfm(tokens(data_corpus_inaugural_subset)), 
                                   groups = data_corpus_inaugural_subset$President)

# Filter the term "american"
freq_american <- subset(freq_grouped, freq_grouped$feature %in% "american")  

ggplot(freq_american, aes(x = group, y = frequency)) +
  geom_point() + 
  scale_y_continuous(limits = c(0, 14), breaks = c(seq(0, 14, 2))) +
  xlab(NULL) + 
  ylab("Frequency") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

dfm_rel_freq <- dfm_weight(dfm(tokens(data_corpus_inaugural_subset)), scheme = "prop") * 100
head(dfm_rel_freq)
Document-feature matrix of: 6 documents, 4,625 features (86.44% sparse) and 4 docvars.
                 features
docs                      my    friends        ,    before          i
  1953-Eisenhower 0.14582574 0.14582574 4.593511 0.1822822 0.10936930
  1957-Eisenhower 0.20975354 0.10487677 6.345045 0.1573152 0.05243838
  1961-Kennedy    0.19467878 0.06489293 5.451006 0.1297859 0.32446463
  1965-Johnson    0.17543860 0.05847953 5.555556 0.2339181 0.87719298
  1969-Nixon      0.28973510 0          5.546358 0.1241722 0.86920530
  1973-Nixon      0.05012531 0.05012531 4.812030 0.2005013 0.60150376
                 features
docs                   begin      the expression       of     those
  1953-Eisenhower 0.03645643 6.234050 0.03645643 5.176814 0.1458257
  1957-Eisenhower 0          5.977976 0          5.034085 0.1573152
  1961-Kennedy    0.19467878 5.580792 0          4.218040 0.4542505
  1965-Johnson    0          4.502924 0          3.333333 0.1754386
  1969-Nixon      0          5.629139 0          3.890728 0.4552980
  1973-Nixon      0          4.160401 0          3.408521 0.3007519
[ reached max_nfeat ... 4,615 more features ]
rel_freq <- textstat_frequency(dfm_rel_freq, groups = dfm_rel_freq$President)

# Filter the term "american"
rel_freq_american <- subset(rel_freq, feature %in% "american")  

ggplot(rel_freq_american, aes(x = group, y = frequency)) +
  geom_point() + 
  scale_y_continuous(limits = c(0, 0.7), breaks = c(seq(0, 0.7, 0.1))) +
  xlab(NULL) + 
  ylab("Relative frequency") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

dfm_weight_pres <- data_corpus_inaugural %>%
  corpus_subset(Year > 2000) %>%
  tokens(remove_punct = TRUE) %>%
  tokens_remove(stopwords("english")) %>%
  dfm() %>%
  dfm_weight(scheme = "prop")

# Calculate relative frequency by president
freq_weight <- textstat_frequency(dfm_weight_pres, n = 15, 
                                  groups = dfm_weight_pres$President)

ggplot(data = freq_weight, aes(x = nrow(freq_weight):1, y = frequency)) +
  geom_point() +
  facet_wrap(~ group, scales = "free") +
  coord_flip() +
  scale_x_continuous(breaks = nrow(freq_weight):1,
                     labels = freq_weight$feature) +
  labs(x = NULL, y = "Relative frequency")

# Only select speeches by Kennedy and Reagan
pres_corpus <- corpus_subset(data_corpus_inaugural, 
                             President %in% c("Obama", "Bush"))

# Create a dfm grouped by president
pres_dfm <- tokens(pres_corpus, remove_punct = TRUE) %>%
  tokens_remove(stopwords("english")) %>%
  tokens_group(groups = President) %>%
  dfm()

# Calculate keyness and determine Reagan as target group
result_keyness <- textstat_keyness(pres_dfm, target = "Bush")

# Plot estimated word keyness
textplot_keyness(result_keyness) 

# Plot without the reference text (in this case Obama)
textplot_keyness(result_keyness, show_reference = FALSE)

The output shows both similarities and differences over time among presidents in inaugural speeches over time across different presidents. An important similarity is how the presidents used repeated national and civic themes, especially terms such as America, freedom, and safety. The comparison of Eisenhower, Johnson, and Nixon shows that presidents use different languages to define national priorities. Additionally, the Bush-Obama comparison demonstrates that presidents can share broad themes, but use different rhetorical patterns and points of emphasis. Overall, these results shows that there is continuity in ceremonial purpose, but language, tone, and policy framing vary.

Wordfish

Wordfish as a Poisson scaling model that estimates one-dimension document positions that utilizes maximum likelihoods, and both the estimated position of words and the documents can be plotted.

### EPPS 6323 Workshop: Text Analytics- US DOD China Military Power Reports (1999–2025)
### Tools: quanteda, quanteda.textmodels, quanteda.textstats, stm
### Author: Karl Ho, University of Texas at Dallas

library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.5.3
Warning: package 'tidyr' was built under R version 4.5.3
Warning: package 'dplyr' was built under R version 4.5.3
Warning: package 'lubridate' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.1     ✔ stringr   1.5.1
✔ forcats   1.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pdftools)
Warning: package 'pdftools' was built under R version 4.5.3
Using poppler version 26.01.0
library(quanteda)
library(quanteda.textstats)
library(quanteda.textplots)
library(quanteda.textmodels)
library(topicmodels)
Warning: package 'topicmodels' was built under R version 4.5.3
library(scales)
Warning: package 'scales' was built under R version 4.5.3

Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor
library(tictoc)
Warning: package 'tictoc' was built under R version 4.5.3
# Configuration: Set path to your PDF directory
# IMPORTANT: use forward slashes on Windows
PATH_DATA <- "C:/akbarikasra9.github.io"   # change this to your actual folder containing the PDFs

# Seed for reproducibility
SEED <- 6323


## 1. DATA INGESTION

# Check folder exists
if (!dir.exists(PATH_DATA)) {
  stop("PATH_DATA does not exist. Check your folder path.")
}

# List all PDF files (recursive in case PDFs are inside subfolders)
pdf_files <- list.files(
  PATH_DATA,
  pattern = "\\.pdf$",
  full.names = TRUE,
  ignore.case = TRUE,
  recursive = TRUE
)

cat("Found", length(pdf_files), "PDF files\n")
Found 51 PDF files
print(pdf_files)
 [1] "C:/akbarikasra9.github.io/akbarikasra9.github.io/files/ai-lab.pdf"                                                                                  
 [2] "C:/akbarikasra9.github.io/akbarikasra9.github.io/files/ai lab.pdf"                                                                                  
 [3] "C:/akbarikasra9.github.io/datamethodsgovfiles_1.pdf"                                                                                                
 [4] "C:/akbarikasra9.github.io/datamethodsgovfiles_10.pdf"                                                                                               
 [5] "C:/akbarikasra9.github.io/datamethodsgovfiles_11.pdf"                                                                                               
 [6] "C:/akbarikasra9.github.io/datamethodsgovfiles_12.pdf"                                                                                               
 [7] "C:/akbarikasra9.github.io/datamethodsgovfiles_13.pdf"                                                                                               
 [8] "C:/akbarikasra9.github.io/datamethodsgovfiles_14.pdf"                                                                                               
 [9] "C:/akbarikasra9.github.io/datamethodsgovfiles_15.pdf"                                                                                               
[10] "C:/akbarikasra9.github.io/datamethodsgovfiles_16.pdf"                                                                                               
[11] "C:/akbarikasra9.github.io/datamethodsgovfiles_17.pdf"                                                                                               
[12] "C:/akbarikasra9.github.io/datamethodsgovfiles_18.pdf"                                                                                               
[13] "C:/akbarikasra9.github.io/datamethodsgovfiles_19.pdf"                                                                                               
[14] "C:/akbarikasra9.github.io/datamethodsgovfiles_2.pdf"                                                                                                
[15] "C:/akbarikasra9.github.io/datamethodsgovfiles_20.pdf"                                                                                               
[16] "C:/akbarikasra9.github.io/datamethodsgovfiles_4.pdf"                                                                                                
[17] "C:/akbarikasra9.github.io/datamethodsgovfiles_5.pdf"                                                                                                
[18] "C:/akbarikasra9.github.io/datamethodsgovfiles_6.pdf"                                                                                                
[19] "C:/akbarikasra9.github.io/datamethodsgovfiles_7.pdf"                                                                                                
[20] "C:/akbarikasra9.github.io/datamethodsgovfiles_8.pdf"                                                                                                
[21] "C:/akbarikasra9.github.io/datamethodsgovfiles_9.pdf"                                                                                                
[22] "C:/akbarikasra9.github.io/Figure01.pdf"                                                                                                             
[23] "C:/akbarikasra9.github.io/files/ai-lab.pdf"                                                                                                         
[24] "C:/akbarikasra9.github.io/images/datamethodsgovfiles_1.pdf"                                                                                         
[25] "C:/akbarikasra9.github.io/USDOD/USDOD/1999_GPO-CRPT.pdf"                                                                                            
[26] "C:/akbarikasra9.github.io/USDOD/USDOD/2000_asia_neac_dod_china.pdf"                                                                                 
[27] "C:/akbarikasra9.github.io/USDOD/USDOD/2001_DoD_AR.pdf"                                                                                              
[28] "C:/akbarikasra9.github.io/USDOD/USDOD/2002_0712china.pdf"                                                                                           
[29] "C:/akbarikasra9.github.io/USDOD/USDOD/2003_chinaex.pdf"                                                                                             
[30] "C:/akbarikasra9.github.io/USDOD/USDOD/2004_0528PRC.pdf"                                                                                             
[31] "C:/akbarikasra9.github.io/USDOD/USDOD/2005_0719china.pdf"                                                                                           
[32] "C:/akbarikasra9.github.io/USDOD/USDOD/2006_dod.pdf"                                                                                                 
[33] "C:/akbarikasra9.github.io/USDOD/USDOD/2007_dod.pdf"                                                                                                 
[34] "C:/akbarikasra9.github.io/USDOD/USDOD/2008_dod.pdf"                                                                                                 
[35] "C:/akbarikasra9.github.io/USDOD/USDOD/2009_China_Military_Power_Report.pdf"                                                                         
[36] "C:/akbarikasra9.github.io/USDOD/USDOD/2010_CMPR_Final.pdf"                                                                                          
[37] "C:/akbarikasra9.github.io/USDOD/USDOD/2011_CMPR_Final.pdf"                                                                                          
[38] "C:/akbarikasra9.github.io/USDOD/USDOD/2012_CMPR_Final.pdf"                                                                                          
[39] "C:/akbarikasra9.github.io/USDOD/USDOD/2013_China_Report_FINAL.pdf"                                                                                  
[40] "C:/akbarikasra9.github.io/USDOD/USDOD/2014_DoD_China_Report.pdf"                                                                                    
[41] "C:/akbarikasra9.github.io/USDOD/USDOD/2015_China_Military_Power_Report.pdf"                                                                         
[42] "C:/akbarikasra9.github.io/USDOD/USDOD/2016 China Military Power Report.pdf"                                                                         
[43] "C:/akbarikasra9.github.io/USDOD/USDOD/2017_China_Military_Power_Report.pdf"                                                                         
[44] "C:/akbarikasra9.github.io/USDOD/USDOD/2018_CHINA-MILITARY-POWER-REPORT.pdf"                                                                         
[45] "C:/akbarikasra9.github.io/USDOD/USDOD/2019_CHINA_MILITARY_POWER_REPORT.pdf"                                                                         
[46] "C:/akbarikasra9.github.io/USDOD/USDOD/2020_DOD-CHINA-MILITARY-POWER-REPORT-FINAL.pdf"                                                               
[47] "C:/akbarikasra9.github.io/USDOD/USDOD/2021-cmpr-final.pdf"                                                                                          
[48] "C:/akbarikasra9.github.io/USDOD/USDOD/2022-military-and-security-developments-involving-the-peoples-republic-of-china.pdf"                          
[49] "C:/akbarikasra9.github.io/USDOD/USDOD/2023-MILITARY-AND-SECURITY-DEVELOPMENTS-PRC.PDF"                                                              
[50] "C:/akbarikasra9.github.io/USDOD/USDOD/2024_MILITARY-AND-SECURITY-DEVELOPMENTS-PRC.PDF"                                                              
[51] "C:/akbarikasra9.github.io/USDOD/USDOD/2025-ANNUAL-REPORT-TO-CONGRESS-MILITARY-AND-SECURITY-DEVELOPMENTS-INVOLVING-THE-PEOPLES-REPUBLIC-OF-CHINA.PDF"
if (length(pdf_files) == 0) {
  stop("No PDF files found. Check PATH_DATA and make sure the folder actually contains PDFs.")
}

# Read PDFs safely
read_pdf_safe <- function(filepath) {
  tryCatch({
    pages <- suppressWarnings(pdftools::pdf_text(filepath))
    text <- paste(pages, collapse = "\n")

    if (is.null(text) || length(text) == 0 || nchar(trimws(text)) == 0) {
      warning("No usable text extracted from: ", basename(filepath))
      return(NA_character_)
    }

    return(text)
  }, error = function(e) {
    warning("Failed to read: ", basename(filepath), " — ", e$message)
    return(NA_character_)
  })
}

DODq <- tibble(
  doc_id = basename(pdf_files),
  text   = purrr::map_chr(pdf_files, read_pdf_safe)
)
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Could not parse ligature component "10" of "_10" in parseCharName
PDF error: Could not parse ligature component "11" of "_11" in parseCharName
PDF error: Could not parse ligature component "12" of "_12" in parseCharName
PDF error: Could not parse ligature component "13" of "_13" in parseCharName
PDF error: Could not parse ligature component "14" of "_14" in parseCharName
PDF error: Could not parse ligature component "15" of "_15" in parseCharName
PDF error: Could not parse ligature component "16" of "_16" in parseCharName
PDF error: Could not parse ligature component "17" of "_17" in parseCharName
PDF error: Could not parse ligature component "18" of "_18" in parseCharName
PDF error: Could not parse ligature component "19" of "_19" in parseCharName
PDF error: Could not parse ligature component "0020" of "_0020" in parseCharName
PDF error: Could not parse ligature component "0021" of "_0021" in parseCharName
PDF error: Could not parse ligature component "0022" of "_0022" in parseCharName
PDF error: Could not parse ligature component "0023" of "_0023" in parseCharName
PDF error: Could not parse ligature component "0024" of "_0024" in parseCharName
PDF error: Could not parse ligature component "0025" of "_0025" in parseCharName
PDF error: Could not parse ligature component "0026" of "_0026" in parseCharName
PDF error: Could not parse ligature component "0027" of "_0027" in parseCharName
PDF error: Could not parse ligature component "28" of "_28" in parseCharName
PDF error: Could not parse ligature component "0029" of "_0029" in parseCharName
PDF error: Could not parse ligature component "0030" of "_0030" in parseCharName
PDF error: Could not parse ligature component "39" of "_39" in parseCharName
PDF error: Could not parse ligature component "0042" of "_0042" in parseCharName
PDF error: Could not parse ligature component "52" of "_52" in parseCharName
PDF error: Could not parse ligature component "57" of "_57" in parseCharName
PDF error: Could not parse ligature component "0061" of "_0061" in parseCharName
PDF error: Could not parse ligature component "70" of "_70" in parseCharName
PDF error: Could not parse ligature component "71" of "_71" in parseCharName
PDF error: Could not parse ligature component "72" of "_72" in parseCharName
PDF error: Could not parse ligature component "73" of "_73" in parseCharName
PDF error: Could not parse ligature component "74" of "_74" in parseCharName
PDF error: Could not parse ligature component "0075" of "_0075" in parseCharName
PDF error: Could not parse ligature component "76" of "_76" in parseCharName
PDF error: Could not parse ligature component "0077" of "_0077" in parseCharName
PDF error: Could not parse ligature component "0078" of "_0078" in parseCharName
PDF error: Could not parse ligature component "0079" of "_0079" in parseCharName
PDF error: Could not parse ligature component "0080" of "_0080" in parseCharName
PDF error: Could not parse ligature component "0081" of "_0081" in parseCharName
PDF error: Could not parse ligature component "0082" of "_0082" in parseCharName
PDF error: Could not parse ligature component "84" of "_84" in parseCharName
PDF error: Could not parse ligature component "85" of "_85" in parseCharName
PDF error: Could not parse ligature component "113" of "_113" in parseCharName
PDF error: Could not parse ligature component "114" of "_114" in parseCharName
PDF error: Could not parse ligature component "115" of "_115" in parseCharName
PDF error: Could not parse ligature component "116" of "_116" in parseCharName
PDF error: Could not parse ligature component "123" of "_123" in parseCharName
PDF error: Could not parse ligature component "127" of "_127" in parseCharName
PDF error: Could not parse ligature component "128" of "_128" in parseCharName
PDF error: Could not parse ligature component "129" of "_129" in parseCharName
PDF error: Could not parse ligature component "130" of "_130" in parseCharName
PDF error: Could not parse ligature component "131" of "_131" in parseCharName
PDF error: Could not parse ligature component "133" of "_133" in parseCharName
PDF error: Could not parse ligature component "141" of "_141" in parseCharName
PDF error: Could not parse ligature component "143" of "_143" in parseCharName
PDF error: Could not parse ligature component "144" of "_144" in parseCharName
PDF error: Could not parse ligature component "145" of "_145" in parseCharName
PDF error: Could not parse ligature component "161" of "_161" in parseCharName
PDF error: Could not parse ligature component "169" of "_169" in parseCharName
PDF error: Could not parse ligature component "170" of "_170" in parseCharName
PDF error: Could not parse ligature component "171" of "_171" in parseCharName
PDF error: Could not parse ligature component "172" of "_172" in parseCharName
PDF error: Could not parse ligature component "173" of "_173" in parseCharName
PDF error: Could not parse ligature component "174" of "_174" in parseCharName
PDF error: Could not parse ligature component "175" of "_175" in parseCharName
PDF error: Could not parse ligature component "181" of "_181" in parseCharName
PDF error: Could not parse ligature component "182" of "_182" in parseCharName
PDF error: Could not parse ligature component "183" of "_183" in parseCharName
PDF error: Could not parse ligature component "184" of "_184" in parseCharName
PDF error: Could not parse ligature component "185" of "_185" in parseCharName
PDF error: Could not parse ligature component "186" of "_186" in parseCharName
PDF error: Could not parse ligature component "189" of "_189" in parseCharName
PDF error: Could not parse ligature component "190" of "_190" in parseCharName
PDF error: Could not parse ligature component "191" of "_191" in parseCharName
PDF error: Could not parse ligature component "192" of "_192" in parseCharName
PDF error: Could not parse ligature component "193" of "_193" in parseCharName
PDF error: Could not parse ligature component "194" of "_194" in parseCharName
PDF error: Could not parse ligature component "195" of "_195" in parseCharName
PDF error: Could not parse ligature component "196" of "_196" in parseCharName
PDF error: Could not parse ligature component "197" of "_197" in parseCharName
PDF error: Could not parse ligature component "198" of "_198" in parseCharName
PDF error: Could not parse ligature component "199" of "_199" in parseCharName
PDF error: Could not parse ligature component "214" of "_214" in parseCharName
PDF error: Could not parse ligature component "215" of "_215" in parseCharName
PDF error: Could not parse ligature component "216" of "_216" in parseCharName
PDF error: Could not parse ligature component "0217" of "_0217" in parseCharName
PDF error: Could not parse ligature component "218" of "_218" in parseCharName
PDF error: Could not parse ligature component "219" of "_219" in parseCharName
PDF error: Could not parse ligature component "220" of "_220" in parseCharName
PDF error: Could not parse ligature component "221" of "_221" in parseCharName
PDF error: Could not parse ligature component "222" of "_222" in parseCharName
PDF error: Could not parse ligature component "223" of "_223" in parseCharName
PDF error: Could not parse ligature component "224" of "_224" in parseCharName
PDF error: Could not parse ligature component "0225" of "_0225" in parseCharName
PDF error: Could not parse ligature component "226" of "_226" in parseCharName
PDF error: Could not parse ligature component "227" of "_227" in parseCharName
PDF error: Could not parse ligature component "228" of "_228" in parseCharName
PDF error: Could not parse ligature component "229" of "_229" in parseCharName
PDF error: Could not parse ligature component "230" of "_230" in parseCharName
PDF error: Could not parse ligature component "231" of "_231" in parseCharName
PDF error: Could not parse ligature component "232" of "_232" in parseCharName
PDF error: Could not parse ligature component "233" of "_233" in parseCharName
PDF error: Could not parse ligature component "234" of "_234" in parseCharName
PDF error: Could not parse ligature component "235" of "_235" in parseCharName
PDF error: Could not parse ligature component "236" of "_236" in parseCharName
PDF error: Could not parse ligature component "237" of "_237" in parseCharName
PDF error: Could not parse ligature component "238" of "_238" in parseCharName
PDF error: Could not parse ligature component "239" of "_239" in parseCharName
PDF error: Could not parse ligature component "240" of "_240" in parseCharName
PDF error: Could not parse ligature component "241" of "_241" in parseCharName
PDF error: Could not parse ligature component "242" of "_242" in parseCharName
PDF error: Could not parse ligature component "243" of "_243" in parseCharName
PDF error: Could not parse ligature component "244" of "_244" in parseCharName
PDF error: Could not parse ligature component "245" of "_245" in parseCharName
PDF error: Could not parse ligature component "247" of "_247" in parseCharName
PDF error: Could not parse ligature component "248" of "_248" in parseCharName
PDF error: Could not parse ligature component "249" of "_249" in parseCharName
PDF error: Could not parse ligature component "250" of "_250" in parseCharName
PDF error: Could not parse ligature component "251" of "_251" in parseCharName
PDF error: Could not parse ligature component "252" of "_252" in parseCharName
PDF error: Could not parse ligature component "253" of "_253" in parseCharName
PDF error: Could not parse ligature component "254" of "_254" in parseCharName
PDF error: Could not parse ligature component "255" of "_255" in parseCharName
PDF error: Expected the default config, but wasn't able to find it, or it isn't a Dictionary
# Report failed or empty files
n_failed <- sum(is.na(DODq$text))
if (n_failed > 0) {
  cat("WARNING:", n_failed, "file(s) failed to read or were empty:\n")
  print(DODq$doc_id[is.na(DODq$text)])
  DODq <- dplyr::filter(DODq, !is.na(text))
}

if (nrow(DODq) == 0) {
  stop("All files failed to load, so the corpus is empty.")
}

# Extract year from filenames (first 4-digit sequence)
DODq$year <- as.numeric(stringr::str_extract(DODq$doc_id, "\\d{4}"))

# Drop files where year could not be extracted
if (any(is.na(DODq$year))) {
  cat("Dropping files with no 4-digit year in filename:\n")
  print(DODq$doc_id[is.na(DODq$year)])
  DODq <- dplyr::filter(DODq, !is.na(year))
}
Dropping files with no 4-digit year in filename:
 [1] "ai-lab.pdf"                 "ai lab.pdf"                
 [3] "datamethodsgovfiles_1.pdf"  "datamethodsgovfiles_10.pdf"
 [5] "datamethodsgovfiles_11.pdf" "datamethodsgovfiles_12.pdf"
 [7] "datamethodsgovfiles_13.pdf" "datamethodsgovfiles_14.pdf"
 [9] "datamethodsgovfiles_15.pdf" "datamethodsgovfiles_16.pdf"
[11] "datamethodsgovfiles_17.pdf" "datamethodsgovfiles_18.pdf"
[13] "datamethodsgovfiles_19.pdf" "datamethodsgovfiles_2.pdf" 
[15] "datamethodsgovfiles_20.pdf" "datamethodsgovfiles_4.pdf" 
[17] "datamethodsgovfiles_5.pdf"  "datamethodsgovfiles_6.pdf" 
[19] "datamethodsgovfiles_7.pdf"  "datamethodsgovfiles_8.pdf" 
[21] "datamethodsgovfiles_9.pdf"  "Figure01.pdf"              
[23] "ai-lab.pdf"                 "datamethodsgovfiles_1.pdf" 
if (nrow(DODq) == 0) {
  stop("No valid files remain after dropping files without extractable years.")
}

# Create clean UNIQUE document IDs
DODq$doc_id <- make.unique(paste0("USDOD_", DODq$year, ".pdf"))

# Sort by year for consistent ordering
DODq <- dplyr::arrange(DODq, year)

cat("Successfully loaded", nrow(DODq), "reports spanning",
    min(DODq$year), "to", max(DODq$year), "\n")
Successfully loaded 27 reports spanning 1999 to 2025 
print(DODq[, c("doc_id", "year")])
# A tibble: 27 × 2
   doc_id          year
   <chr>          <dbl>
 1 USDOD_1999.pdf  1999
 2 USDOD_2000.pdf  2000
 3 USDOD_2001.pdf  2001
 4 USDOD_2002.pdf  2002
 5 USDOD_2003.pdf  2003
 6 USDOD_2004.pdf  2004
 7 USDOD_2005.pdf  2005
 8 USDOD_2006.pdf  2006
 9 USDOD_2007.pdf  2007
10 USDOD_2008.pdf  2008
# ℹ 17 more rows
## 2. CORPUS CONSTRUCTION WITH METADATA

corp_DOD <- corpus(DODq, text_field = "text")

# Assign president by year (updated for second Trump term)
docvars(corp_DOD, "president") <- dplyr::case_when(
  docvars(corp_DOD, "year") <= 2000                          ~ "Clinton",
  docvars(corp_DOD, "year") >= 2001 & docvars(corp_DOD, "year") <= 2008  ~ "Bush",
  docvars(corp_DOD, "year") >= 2009 & docvars(corp_DOD, "year") <= 2016  ~ "Obama",
  docvars(corp_DOD, "year") >= 2017 & docvars(corp_DOD, "year") <= 2020  ~ "Trump",
  docvars(corp_DOD, "year") >= 2021 & docvars(corp_DOD, "year") <= 2024  ~ "Biden",
  docvars(corp_DOD, "year") >= 2025                          ~ "Trump II",
  TRUE                                                       ~ NA_character_
)

# Verify metadata
summary(corp_DOD, 5)
Corpus consisting of 27 documents, showing 5 documents:

           Text Types Tokens Sentences year president
 USDOD_1999.pdf 16068 301499     10250 1999   Clinton
 USDOD_2000.pdf  2990  15078       540 2000   Clinton
 USDOD_2001.pdf 14864 186149      6246 2001      Bush
 USDOD_2002.pdf  4178  25908      1051 2002      Bush
 USDOD_2003.pdf  4068  25848       957 2003      Bush
table(docvars(corp_DOD, "president"))

   Biden     Bush  Clinton    Obama    Trump Trump II 
       4        8        2        8        4        1 
## 3. TOKENIZATION AND DFM

# Tokenize with preprocessing
toks_DOD <- tokens(corp_DOD, remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(stopwords("english")) %>%
  tokens_remove(c("page", "figure", "table", "appendix", "chapter",
                  "https", "www", "pdf", "gov"))  # Remove boilerplate

# Create DFM
dfmat_DOD <- dfm(toks_DOD)

cat("DFM dimensions:", dim(dfmat_DOD), "\n")
DFM dimensions: 27 27464 
cat("Documents:", ndoc(dfmat_DOD), "| Features:", nfeat(dfmat_DOD), "\n")
Documents: 27 | Features: 27464 
## 4. EXPLORATORY TEXT ANALYSIS

# --- 4a. Top features ---

topfeat <- topfeatures(dfmat_DOD, 30)
data.frame(feature = names(topfeat), freq = topfeat) %>%
  ggplot(aes(x = reorder(feature, freq), y = freq)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 30 Features across All DOD China Reports",
       x = NULL, y = "Frequency") +
  theme_bw(base_size = 18) +
  theme(text = element_text(family = "Palatino"))
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database

# --- 4b. Track key terms over time ---

# Create a year-grouped DFM for trend analysis
dfmat_year <- dfm_group(dfmat_DOD, groups = docvars(corp_DOD, "year"))

# Track specific terms
key_terms <- c("taiwan", "pla", "nuclear", "cyber", "missile",
               "threat", "aircraft", "carrier", "space", "ai")

term_trends <- convert(dfmat_year, to = "data.frame") %>%
  dplyr::select(doc_id, any_of(key_terms)) %>%
  mutate(year = as.numeric(stringr::str_extract(doc_id, "\\d{4}"))) %>%
  pivot_longer(-c(doc_id, year), names_to = "term", values_to = "count")

# Normalize by document length
doc_lengths <- ntoken(dfmat_year)
term_trends <- term_trends %>%
  left_join(tibble(doc_id = names(doc_lengths), total = doc_lengths), by = "doc_id") %>%
  mutate(rate = count / total * 10000)  # Rate per 10,000 words

ggplot(term_trends, aes(x = year, y = rate, color = term)) +
  geom_line(linewidth = 0.8) +
  geom_point(size = 1.5) +
  facet_wrap(~term, scales = "free_y", ncol = 2) +
  labs(title = "Key Term Frequency in DOD China Reports (per 10,000 words)",
       x = "Year", y = "Rate per 10,000 words") +
  theme_bw(base_size = 14) +
  theme(legend.position = "none", text = element_text(family = "Palatino"))
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database

# --- 4c. Keyness: Trump vs. Obama/Bush ---

tstat_key <- textstat_keyness(
  dfmat_DOD,
  target = docvars(corp_DOD, "president") %in% c("Trump", "Trump II"),
  measure = "lr"
)

textplot_keyness(tstat_key, n = 15,
                 color = c("steelblue", "slategray"), font = "Palatino") +
  labs(title = "Keyness: Trump-era vs. Other Administrations",
       subtitle = "Log-likelihood ratio (LLR)") +
  theme_bw(base_size = 16) +
  theme(legend.position = "bottom", text = element_text(family = "Palatino"))
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database

# --- 4d. Collocations (bigrams and trigrams) ---

tstat_col2 <- textstat_collocations(toks_DOD, size = 2, min_count = 50)
head(tstat_col2, 20)
              collocation count count_nested length    lambda         z
1       military security  2043            0      2  4.128573 136.07717
2         report congress  1841            0      2  8.113547 130.40135
3           united states  1868            0      2  9.438579 117.85319
4           annual report  1842            0      2  8.581998 117.79561
5   security developments  2007            0      2  7.529940 112.39060
6       congress military  1784            0      2  5.893082 111.29157
7      involving people’s  1996            0      2  9.568945 102.95521
8          DEFENSE annual  1493            0      2  9.452197 102.00598
9  developments involving  2008            0      2 10.735301  99.30482
10      ballistic missile   588            0      2  6.117265  99.15125
11     ballistic missiles   517            0      2  6.335754  97.11052
12              air force   724            0      2  4.535028  96.96035
13             long march   489            0      2  7.918043  92.97652
14      national security   776            0      2  3.982422  91.60292
15        theater command   444            0      2  6.403907  90.78121
16       review committee   420            0      2  6.658766  89.56076
17      people’s republic  2351            0      2 10.957725  87.67401
18       national defense   698            0      2  3.731231  83.45899
19     independent review   466            0      2  8.626357  83.12520
20                    o o   318            0      2  8.264850  79.46064
tstat_col3 <- textstat_collocations(toks_DOD, size = 3, min_count = 30)
head(tstat_col3, 15)
                          collocation count count_nested length    lambda
1         defense technology security   128            0      3  5.491560
2      military security developments  2004            0      3  9.600290
3                 forces china taiwan    54            0      3  3.705999
4      security development interests    95            0      3  5.560053
5                  use military force    37            0      3  5.567996
6          national people’s congress    59            0      3  8.870559
7          security strategy military    34            0      3  3.705936
8             strategic support force    58            0      3  3.484399
9  technology security administration   116            0      3  6.864390
10    security developments involving  2003            0      3  8.931163
11            people’s republic china  2338            0      3  7.521485
12      military strategic guidelines    75            0      3  7.708746
13               u.s national weapons    38            0      3  2.779324
14                air defense systems    77            0      3  2.681339
15           report congress military  1784            0      3 11.678788
           z
1  17.745103
2  14.582708
3  12.975573
4  12.311551
5  11.575311
6  11.080896
7  10.605510
8   9.759830
9   9.671924
10  8.582845
11  8.320040
12  8.072079
13  7.979257
14  7.954097
15  7.505240
## 5. WORDFISH SCALING

# Trim DFM for Wordfish (remove very rare and very common features)
dfmat_wf <- dfm_trim(dfmat_DOD, min_termfreq = 10, max_docfreq = 0.95,
                     docfreq_type = "prop")

# Keep president docvar attached to the DFM
docvars(dfmat_wf, "president") <- docvars(corp_DOD, "president")[match(docnames(dfmat_wf), docnames(corp_DOD))]

# Identify anchor documents by name (not position)
# Use an early report and a recent report to set scale direction
anchor_early <- which(docnames(dfmat_wf) == "USDOD_2000.pdf")
anchor_late  <- which(docnames(dfmat_wf) == "USDOD_2023.pdf")

if (length(anchor_early) == 0 || length(anchor_late) == 0) {
  stop("Anchor documents not found. Check that USDOD_2000.pdf and USDOD_2023.pdf exist in dfmat_wf.")
}

cat("Anchor documents:",
    docnames(dfmat_wf)[anchor_early], "(early) and",
    docnames(dfmat_wf)[anchor_late], "(late)\n")
Anchor documents: USDOD_2000.pdf (early) and USDOD_2023.pdf (late)
# Fit Wordfish
tmod_wf <- textmodel_wordfish(dfmat_wf, dir = c(anchor_early, anchor_late))
summary(tmod_wf)

Call:
textmodel_wordfish.dfm(x = dfmat_wf, dir = c(anchor_early, anchor_late))

Estimated Document Positions:
                   theta       se
USDOD_1999.pdf -4.207588 0.004153
USDOD_2000.pdf -0.390048 0.044274
USDOD_2001.pdf -1.620679 0.012955
USDOD_2002.pdf -0.592040 0.034528
USDOD_2003.pdf -0.419041 0.033182
USDOD_2004.pdf -0.239365 0.032227
USDOD_2005.pdf -0.258012 0.036257
USDOD_2006.pdf -0.154419 0.032662
USDOD_2007.pdf -0.124346 0.034982
USDOD_2008.pdf  0.005887 0.025718
USDOD_2009.pdf  0.029329 0.023079
USDOD_2010.pdf  0.054798 0.022772
USDOD_2011.pdf  0.118326 0.019899
USDOD_2012.pdf  0.124458 0.032058
USDOD_2013.pdf  0.310564 0.017454
USDOD_2014.pdf  0.366308 0.015956
USDOD_2015.pdf  0.414408 0.013758
USDOD_2016.pdf  0.486486 0.011620
USDOD_2017.pdf  0.583284 0.009979
USDOD_2018.pdf  0.638408 0.007349
USDOD_2019.pdf  0.657588 0.006869
USDOD_2020.pdf  0.695446 0.004273
USDOD_2021.pdf  0.711583 0.003524
USDOD_2022.pdf  0.703120 0.003745
USDOD_2023.pdf  0.772127 0.001359
USDOD_2024.pdf  0.721475 0.002933
USDOD_2025.pdf  0.611940 0.008589

Estimated Feature Scores:
      volume  select committee  house representatives  session submitted
beta -1.0469 -1.3581   -0.9316 -1.515          -1.253 -0.21097   -0.5357
psi  -0.2393 -0.3581    1.8192 -1.293          -0.199  0.07625   -0.4235
          mr    cox california chairman  january committed  whole ordered
beta 0.05891 -2.157    -0.5698  -0.0576 -0.04423  -0.06522 0.0869  0.5645
psi  1.16596 -7.919    -0.8807   2.2986  2.11627   0.80251 0.5139  0.2797
     subject  review amended    1st washington   note   final peoples approved
beta -0.5419 -0.9644 -0.2182 0.3005    -0.3680 0.2141 -0.4916  -1.254  -0.5488
psi   0.4656  0.7948  0.3529 0.3242     0.9478 1.5388  0.5241  -4.031   0.0689
     served   version classified    top  secret  issued
beta 0.3137 0.0003564    -0.7582 0.1922 -0.4143 -0.2331
psi  1.5902 1.3162275    -0.2214 1.8753 -0.7545  1.0297
# --- 5a. Document positions (1D) ---
textplot_scale1d(tmod_wf)

# --- 5b. Document positions by president ---
textplot_scale1d(tmod_wf, groups = docvars(dfmat_wf, "president"),
                 highlighted_color = "firebrick")

# --- 5c. Feature positions with domain-specific highlights ---
textplot_scale1d(tmod_wf, margin = "features",
                 highlighted = c("taiwan", "pla", "nuclear", "missile",
                                 "cyber", "space", "carrier", "ai"),
                 highlighted_color = "firebrick")

# --- 5d. Theta over time (custom ggplot) ---

doc_pos <- data.frame(
  document  = docnames(dfmat_wf),
  theta     = tmod_wf$theta,
  year      = as.numeric(stringr::str_extract(docnames(dfmat_wf), "\\d{4}")),
  president = docvars(dfmat_wf, "president"),
  stringsAsFactors = FALSE
) %>% arrange(year)

# Color palette for all presidents
pres_colors <- c(
  "Clinton"  = "#9467bd",
  "Bush"     = "#1f78b4",
  "Obama"    = "#33a02c",
  "Trump"    = "#e31a1c",
  "Biden"    = "#ff7f00",
  "Trump II" = "#d62728"
)

ggplot(doc_pos, aes(x = year, y = theta)) +
  geom_line(color = "gray60", linetype = "dashed") +
  geom_point(aes(color = president), size = 3) +
  geom_text(aes(label = year), vjust = -1.2, size = 3) +
  geom_smooth(method = "loess", se = TRUE, color = "gray40",
              alpha = 0.15, linewidth = 0.5) +
  scale_color_manual(values = pres_colors) +
  labs(title = "DOD China Report Positioning by Year (Wordfish)",
       subtitle = expression("Estimated latent position" ~ hat(theta) ~ "over time"),
       x = "Year", y = expression(hat(theta)),
       color = "President") +
  theme_bw(base_size = 16) +
  theme(text = element_text(family = "Palatino"),
        plot.title = element_text(hjust = 0.5, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "bottom")
`geom_smooth()` using formula = 'y ~ x'
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database

# --- 5e. Diagnostic: Check document length ---
cat("\nDocument lengths (tokens):\n")

Document lengths (tokens):
print(sort(ntoken(dfmat_wf)))
USDOD_2000.pdf USDOD_2012.pdf USDOD_2007.pdf USDOD_2005.pdf USDOD_2006.pdf 
          2445           2559           3095           3291           3670 
USDOD_2004.pdf USDOD_2003.pdf USDOD_2002.pdf USDOD_2008.pdf USDOD_2013.pdf 
          4098           4443           4531           4844           5645 
USDOD_2010.pdf USDOD_2014.pdf USDOD_2009.pdf USDOD_2017.pdf USDOD_2015.pdf 
          5730           5746           5808           6483           6625 
USDOD_2011.pdf USDOD_2016.pdf USDOD_2025.pdf USDOD_2019.pdf USDOD_2018.pdf 
          6722           7175           7592           8867           8894 
USDOD_2020.pdf USDOD_2021.pdf USDOD_2022.pdf USDOD_2024.pdf USDOD_2023.pdf 
         15090          16822          17382          19707          20603 
USDOD_2001.pdf USDOD_1999.pdf 
         40295          80515 
## 6. CORRESPONDENCE ANALYSIS (MULTI-DIMENSIONAL)

tmod_ca <- textmodel_ca(dfmat_wf)

# 1D scale grouped by president
textplot_scale1d(tmod_ca, groups = docvars(dfmat_wf, "president"))

# 2D biplot
dat_ca <- data.frame(
  dim1      = tmod_ca$rowcoord[, 1],
  dim2      = tmod_ca$rowcoord[, 2],
  document  = rownames(tmod_ca$rowcoord),
  stringsAsFactors = FALSE
)

dat_ca$year      <- as.numeric(stringr::str_extract(dat_ca$document, "\\d{4}"))
dat_ca$president <- docvars(dfmat_wf, "president")

ggplot(dat_ca, aes(x = dim1, y = dim2, color = president)) +
  geom_point(size = 3) +
  geom_text(aes(label = year), vjust = -1, size = 3) +
  scale_color_manual(values = pres_colors) +
  labs(title = "Correspondence Analysis: DOD China Reports",
       x = "Dimension 1", y = "Dimension 2") +
  theme_minimal(base_size = 16) +
  theme(legend.position = "bottom", text = element_text(family = "Palatino"))
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database

## 7. TOPIC MODELS

# --- 7a. LDA via topicmodels ---

# Prepare: paragraph-level DFM for richer topic structure
toks_para <- tokens(corpus_reshape(corp_DOD, to = "paragraphs"),
                    remove_punct = TRUE, remove_numbers = TRUE) %>%
  tokens_remove(stopwords("english"))

dfmat_para <- dfm(toks_para) %>%
  dfm_trim(min_termfreq = 5, min_docfreq = 3)

dtm_para <- convert(dfmat_para, to = "topicmodels")

set.seed(SEED)
tic("LDA fitting (k=5)")
lda_model <- LDA(dtm_para, method = "Gibbs", k = 5,
                 control = list(seed = SEED, burnin = 500, iter = 1000))
toc()
LDA fitting (k=5): 39.15 sec elapsed
terms(lda_model, 10)
      Topic 1      Topic 2        Topic 3    Topic 4    Topic 5       
 [1,] "prc"        "pla"          "china"    "china’s"  "defense"     
 [2,] "u.s"        "operations"   "aircraft" "national" "military"    
 [3,] "department" "taiwan"       "missile"  "prc’s"    "china"       
 [4,] "committee"  "capabilities" "air"      "china"    "security"    
 [5,] "launch"     "forces"       "nuclear"  "states"   "people’s"    
 [6,] "export"     "force"        "missiles" "military" "republic"    
 [7,] "satellite"  "support"      "sea"      "security" "developments"
 [8,] "select"     "military"     "first"    "united"   "report"      
 [9,] "united"     "joint"        "plan"     "strategy" "congress"    
[10,] "states"     "systems"      "force"    "prc"      "secretary"   
# --- 7b. Find optimal K using ldatuning ---

# library(ldatuning)
# tic("Finding optimal K")
# result <- FindTopicsNumber(
#   dtm_para,
#   topics = seq(from = 3, to = 15, by = 1),
#   metrics = c("Griffiths2004", "CaoJuan2009", "Arun2010", "Deveaud2014"),
#   method = "Gibbs",
#   control = list(seed = SEED),
#   verbose = TRUE
# )
# toc()
# FindTopicsNumber_plot(result)

# --- 7c. Structural Topic Model (STM) ---

# library(stm)
#
# dfmat_stm <- dfmat_DOD %>%
#   dfm_remove(stopwords("english")) %>%
#   dfm_trim(min_termfreq = 5)
#
# stm_input <- convert(dfmat_stm, to = "stm")
# stm_input$meta$president <- docvars(corp_DOD, "president")
# stm_input$meta$year <- as.numeric(stringr::str_extract(docnames(dfmat_DOD), "\\d{4}"))
#
# set.seed(SEED)
# k_result <- searchK(
#   documents = stm_input$documents,
#   vocab = stm_input$vocab,
#   K = 5:15,
#   prevalence = ~ president + s(year),
#   data = stm_input$meta,
#   verbose = TRUE
# )
# plot(k_result)
#
# stm_model <- stm(
#   documents = stm_input$documents,
#   vocab = stm_input$vocab,
#   K = 10,
#   prevalence = ~ president + s(year),
#   data = stm_input$meta,
#   seed = SEED
# )
#
# labelTopics(stm_model)
#
# prep <- estimateEffect(1:10 ~ president, stm_model,
#                        metadata = stm_input$meta)
# summary(prep)
#
# plot(topicCorr(stm_model))


## 8. SENTIMENT / POLARITY ANALYSIS

# Using Lexicoder Sentiment Dictionary (LSD2015)
dfmat_sent <- dfm_lookup(dfmat_DOD, dictionary = data_dictionary_LSD2015)

sent_df <- convert(dfmat_sent, to = "data.frame") %>%
  mutate(
    year      = as.numeric(stringr::str_extract(doc_id, "\\d{4}")),
    net       = positive - negative,
    total     = ntoken(corp_DOD),
    sent_rate = net / total,
    president = docvars(corp_DOD, "president")
  )

ggplot(sent_df, aes(x = year, y = sent_rate, color = president)) +
  geom_line(color = "gray60", linetype = "dashed") +
  geom_point(size = 3) +
  geom_smooth(method = "loess", se = TRUE, color = "gray40", alpha = 0.15) +
  geom_hline(yintercept = 0, linetype = "dotted") +
  scale_color_manual(values = pres_colors) +
  labs(title = "Net Sentiment of DOD China Reports over Time",
       subtitle = "LSD2015: (positive − negative) / total tokens",
       x = "Year", y = "Net Sentiment Rate",
       color = "President") +
  theme_bw(base_size = 16) +
  theme(legend.position = "bottom", text = element_text(family = "Palatino"))
`geom_smooth()` using formula = 'y ~ x'
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database
Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
font family not found in Windows font database

## 9. SUMMARY OUTPUT

cat("\n========== ANALYSIS SUMMARY ==========\n")

========== ANALYSIS SUMMARY ==========
cat("Reports analyzed:", ndoc(corp_DOD), "\n")
Reports analyzed: 27 
cat("Year range:", min(DODq$year), "-", max(DODq$year), "\n")
Year range: 1999 - 2025 
cat("Presidents:", paste(unique(docvars(corp_DOD, "president")), collapse = ", "), "\n")
Presidents: Clinton, Bush, Obama, Trump, Biden, Trump II 
cat("DFM dimensions:", dim(dfmat_DOD), "\n")
DFM dimensions: 27 27464 
cat("Wordfish theta range:", round(range(tmod_wf$theta), 3), "\n")
Wordfish theta range: -4.208 0.772 
cat("=======================================\n")
=======================================