## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>")

## -----------------------------------------------------------------------------
# # install.packages("synthesisr") # if needed
# library(synthesisr)

## -----------------------------------------------------------------------------
devtools::load_all()

## -----------------------------------------------------------------------------
# system.file will look for the path to where synthesisr is installed
# by using the example bibliographic data files, you can reproduce the vignette
bibfiles <- list.files(
  system.file("extdata/", package = "synthesisr"),
  full.names = TRUE)

# now we can use read_refs to read in our bibliographic data files
df_initial <- read_refs(bibfiles)

## -----------------------------------------------------------------------------
df <- deduplicate(df_initial) # uses DOI by default

## -----------------------------------------------------------------------------
possible_duplicates <- find_duplicates(df$title,
                                       to_lower = TRUE,
                                       rm_punctuation = TRUE)


manual_checks <- review_duplicates(df$title, possible_duplicates)
print(manual_checks, n = 6)


## -----------------------------------------------------------------------------
df <- extract_unique_references(df, possible_duplicates)

## -----------------------------------------------------------------------------
more_duplicates <- find_duplicates(df$title,
                                   method = "string_osa",
                                   to_lower = TRUE,
                                   rm_punctuation = TRUE)

review_duplicates(df$title, more_duplicates)

## -----------------------------------------------------------------------------
# df$title[more_duplicates == 21]

## -----------------------------------------------------------------------------
cli::cli_text("An integrated occupancy and **space-use model** to predict abundance of imperfectly detected, territorial vertebrates" )
cli::cli_text("An integrated occupancy and **space-usemodel** to predict abundance of imperfectly detected, territorial vertebrates" )

## -----------------------------------------------------------------------------
# df$title[more_duplicates == 140]

## -----------------------------------------------------------------------------
cli::cli_text("Black-backed **three-toed wood-pecker**, Picoides arcticus, predation on Monochamus oregopensis(Coleoptera: Cerambycidae)" )
cli::cli_text("Black-backed **three-toed woodpecker**, Pieoides arcticus, predation on Monochamus oregonensis (Coleoptera, Cerambycidae)" )

## -----------------------------------------------------------------------------
# df$title[more_duplicates == 99]

## -----------------------------------------------------------------------------
cli::cli_text("**2006** May species count of birds" )
cli::cli_text("**2002** May species count for birds" )

## -----------------------------------------------------------------------------
new_duplicates <- override_duplicates(more_duplicates, 99)
results <- extract_unique_references(df, new_duplicates)

## -----------------------------------------------------------------------------
# # # synthesisr can write the full dataset to a bibliographic file
# # # but in this example, we will just write the first citation
# # # we also want it to be a nice clean bibliographic file, so we remove NA data
# # # this makes it easier to view the output when working with a single article
# # citation <- df[1, !is.na(df[1,])]
# #
# # format_citation(citation)
# #
# # write_refs(citation,
# #   format = "bib",
# #   file = FALSE
# # )
#