Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ Authors@R: c(person("Thomas J.", "Leeper", role = c("aut"),
person("Antoine", "Sachet", role = c("aut", "cre"),
email = "antoine.sac@gmail.com"),
person("Dave", "Kincaid", role = c("ctb"),
email = "dave@kincaidlabs.ai"))
email = "dave@kincaidlabs.ai"),
person("Robert", "Norberg", role = c("ctb"),
email = "Robert.Norberg@moffitt.org"))
Description: Client for 'AWS Comprehend' <https://aws.amazon.com/comprehend>, a cloud natural language processing service that can perform a number of quantitative text analyses, including language detection, sentiment analysis, and feature extraction.
License: GPL (>= 2)
URL: https://github.com/cloudyr/aws.comprehend
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ export(detect_medical_phi)
export(detect_phrases)
export(detect_sentiment)
export(detect_syntax)
export(infer_icd10)
export(infer_rxnorm)
export(infer_snowmed_ct)
import(httr)
importFrom(aws.signature,locate_credentials)
importFrom(aws.signature,signature_v4_auth)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# aws.comprehend (development version)

* Added `infer_icd10`, `infer_rxnorm`, and `infer_snowmed_ct` functions to call Comprehend Medical's ontology linking APIs.

# aws.comprehend 0.2.1

* Released on CRAN 2020-03-18
Expand Down
22 changes: 22 additions & 0 deletions R/infer_icd10.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' @title Detect conditions and associated ICD10 codes in a source medical text
#' @description Detect detect possible medical conditions as entities and link them to ICD10 codes in a source medical text
#' @param text A character string containing a text to entities analyze, or a character vector to perform analysis separately for each element.
#' @param language A character string containing a two-letter language code. Currently only \dQuote{en} is supported.
#' @param \dots Additional arguments passed to \code{\link{comprehendHTTP}}.
#' @return A data frame
#' @references \href{https://docs.aws.amazon.com/comprehend-medical/latest/dev/ontology-icd10.html}{AWS Comprehend Medical Developer Guide}
#' @examples
#' \dontrun{
#' # simple example
#' infer_icd10("Mrs. Smith comes in today complaining of shortness of breath.")
#'
#' txt <-c("Mrs. Smith comes in today.",
#' "She is complaining of shortnesss of breath.")
#' infer_icd10(txt)
#' }
#' @export
infer_icd10 <- function(text, language = "en", ...) {
bod <- list(Text = text, LanguageCode = language)
out <- comprehendHTTP(action = "InferICD10CM", body = bod, service = "comprehendmedical", ...)
return(cbind(Index = 0, out$Entities))
}
22 changes: 22 additions & 0 deletions R/infer_rxnorm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' @title Identify medications in a source medical text
#' @description Identify medications in a source medical text and link them to RxCUI codes
#' @param text A character string containing a text to entities analyze, or a character vector to perform analysis separately for each element.
#' @param language A character string containing a two-letter language code. Currently only \dQuote{en} is supported.
#' @param \dots Additional arguments passed to \code{\link{comprehendHTTP}}.
#' @return A data frame
#' @references \href{https://docs.aws.amazon.com/comprehend-medical/latest/dev/ontology-RxNorm.html}{AWS Comprehend Medical Developer Guide}
#' @examples
#' \dontrun{
#' # simple example
#' infer_rxnorm("fluoride topical ( fluoride 1.1 % topical gel ) 1 application Topically daily. Patient is not on warfarin.")
#'
#' txt <-c("fluoride topical ( fluoride 1.1 % topical gel ) 1 application Topically daily.",
#' "Patient is not on warfarin.")
#' infer_rxnorm(txt)
#' }
#' @export
infer_rxnorm <- function(text, language = "en", ...) {
bod <- list(Text = text, LanguageCode = language)
out <- comprehendHTTP(action = "InferRxNorm", body = bod, service = "comprehendmedical", ...)
return(cbind(Index = 0, out$Entities))
}
22 changes: 22 additions & 0 deletions R/infer_snowmed_ct.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' @title Identify clinical terms in a source medical text
#' @description Identify clinical terms in a source medical text and link them to Systematized Nomenclature of Medicine, Clinical Terms (SNOMED CT) codes
#' @param text A character string containing a text to entities analyze, or a character vector to perform analysis separately for each element.
#' @param language A character string containing a two-letter language code. Currently only \dQuote{en} is supported.
#' @param \dots Additional arguments passed to \code{\link{comprehendHTTP}}.
#' @return A data frame
#' @references \href{https://docs.aws.amazon.com/comprehend-medical/latest/dev/ontology-linking-snomed.html}{AWS Comprehend Medical Developer Guide}
#' @examples
#' \dontrun{
#' # simple example
#' infer_snowmed_ct("BHEENT : Boggy inferior turbinates. No oropharyngeal lesion.")
#'
#' txt <-c("BHEENT : Boggy inferior turbinates.",
#' "No oropharyngeal lesion.")
#' infer_snowmed_ct(txt)
#' }
#' @export
infer_snowmed_ct <- function(text, language = "en", ...) {
bod <- list(Text = text, LanguageCode = language)
out <- comprehendHTTP(action = "InferSNOMEDCT", body = bod, service = "comprehendmedical", ...)
return(cbind(Index = 0, out$Entities))
}
34 changes: 34 additions & 0 deletions man/infer_icd10.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/infer_rxnorm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/infer_snowmed_ct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion tests/testthat/helper-medical_mocks.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ generate_mocks_medical <- function() {
actions <- list(
"DetectPHI" = body$medical,
"DetectEntities" = body$medical,
"DetectEntitiesV2" = body$medical
"DetectEntitiesV2" = body$medical,
"InferICD10CM" = body$medical,
"InferRxNorm" = body$medical,
"InferSNOMEDCT" = body$medical
)

mapply(generate_mock_medical, names(actions), actions)
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
35 changes: 35 additions & 0 deletions tests/testthat/test-infer_icd10.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
context("infer_icd10")

body = get_request_body()

test_that("infer_icd10 works on single string", {
output <- with_mock(
comprehendHTTP = mock_comprehendHTTP_medical,
infer_icd10(text = body$medical$Text,
language = body$medical$LanguageCode)
)

icd10_codes <- Reduce(rbind, output$ICD10CMConcepts)
expected_icd10_codes <- read.table(sep = "\t", text = "
Code Description Score
R45.83 Excessive crying of child, adolescent or adult 0.737255275249481
G47.9 Sleep disorder, unspecified 0.592359900474548
Z72.821 Inadequate sleep hygiene 0.291808724403381
Y93.84 Activity, sleeping 0.153297811746597
F51.9 Sleep disorder not due to a substance or known physiological condition, unspecified 0.146037772297859",
header = TRUE, stringsAsFactors = FALSE, strip.white = TRUE)
expect_similar(icd10_codes, expected_icd10_codes)

# These columns are lists of data.frames - complicated to test
output$Attributes <- NULL
output$ICD10CMConcepts <- NULL
output$Traits <- NULL

expected <- read.table(sep = "\t", text = "
Index BeginOffset Category EndOffset Id Score Text Type
0 45 MEDICAL_CONDITION 61 1 0.678973436355591 Sleeping trouble DX_NAME",
header = TRUE, stringsAsFactors = FALSE)

expect_similar(output, expected)

})
36 changes: 36 additions & 0 deletions tests/testthat/test-infer_rxnorm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
context("infer_rxnorm")

body = get_request_body()

test_that("infer_rxnorm works on single string", {
output <- with_mock(
comprehendHTTP = mock_comprehendHTTP_medical,
infer_rxnorm(text = body$medical$Text,
language = body$medical$LanguageCode)
)

rxcui_codes <- Reduce(rbind, output$RxNormConcepts)
expected_rxcui_codes <- read.table(sep = "\t", text = "
Code Description Score
2599 clonidine 0.840667128562927
884185 clonidine hydrochloride 0.2 mg oral tablet 0.208982422947884
884173 clonidine hydrochloride 0.1 mg oral tablet 0.189939811825752
216094 clinidine 0.129013881087303
884225 10 ml clonidine hydrochloride 0.5 mg/ml injection 0.0852060243487358",
colClasses = c("Code" = "character", "Description" = "character", "Score" = "numeric"),
header = TRUE, stringsAsFactors = FALSE, strip.white = TRUE)
expect_similar(rxcui_codes, expected_rxcui_codes)

# These columns are lists of data.frames - complicated to test
output$Attributes <- NULL
output$RxNormConcepts <- NULL
output$Traits <- NULL

expected <- read.table(sep = "\t", text = "
Index BeginOffset Category EndOffset Id Score Text Type
0 83 MEDICATION 92 1 0.997781932353973 Clonidine GENERIC_NAME",
header = TRUE, stringsAsFactors = FALSE)

expect_similar(output, expected)

})
36 changes: 36 additions & 0 deletions tests/testthat/test-infer_snowmed_ct.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
context("infer_snowmed_ct")

body = get_request_body()

test_that("infer_snowmed_ct works on single string", {
output <- with_mock(
comprehendHTTP = mock_comprehendHTTP_medical,
infer_snowmed_ct(text = body$medical$Text,
language = body$medical$LanguageCode)
)

snowmed_codes <- Reduce(rbind, output$SNOMEDCTConcepts)
expected_snowmed_codes <- read.table(sep = "\t", text = "
Code Description Score
301345002 Difficulty sleeping (finding) 0.0105650639161468
39898005 Sleep disorder (disorder) 0.00971199851483107
26677001 Sleep pattern disturbance (finding) 0.00622155796736479
89675003 Sleep terror disorder (disorder) 0.00587156973779202
248259004 Symptoms interfere with sleep (disorder) 0.00511229783296585",
colClasses = c("Code" = "character", "Description" = "character", "Score" = "numeric"),
header = TRUE, stringsAsFactors = FALSE, strip.white = TRUE)
expect_similar(snowmed_codes, expected_snowmed_codes)

# These columns are lists of data.frames - complicated to test
output$Attributes <- NULL
output$SNOMEDCTConcepts <- NULL
output$Traits <- NULL

expected <- read.table(sep = "\t", text = "
Index BeginOffset Category EndOffset Id Score Text Type
0 45 MEDICAL_CONDITION 61 1 0.678973436355591 Sleeping trouble DX_NAME",
header = TRUE, stringsAsFactors = FALSE)

expect_similar(output, expected)

})