Skip to content

Error in data.frame(verb = verb, redux_fn = NA, predicate = name.of.predicate, : arguments imply differing number of rows #88

@DrAndiLowe

Description

@DrAndiLowe

Hi,

I've encountered a possible bug in assertr. I can't be certain, because I'm unable to provide a reproducible example: the data is covered by NDA. My code looks like this:

dat %>% 
  chain_start %>%
  assert(
    in_set(
      NA_character_,
      "PEP N-R",
      "PEP N",
      "PEP SN",
      "PEP RO",
      "PEP NG",
      "PEP L",
      "PEP IO",
      "PEP SN-R",
      "PEP L-R",
      "PEP N-A",
      "PEP RO-R",
      "SOE",
      "PEP SN-A",
      "PEP NG-A",
      "SIE",
      "PEP NG-R"
    ), 
    `SUB-CATEGORY`
  ) %>%
  chain_end(error_fun = filter_bad) %>% 
  {.} -> dat

filter_bad is the function mentioned in #86:

filter_bad <- function(list_of_errors, data = NULL, ...){
  # We are checking to see if there are any errors that
  # are still attached to the data.frame
  if(!is.null(data) && !is.null(attr(data, "assertr_errors"))) {
    errors <- append(attr(data, "assertr_errors"), errors)
  }
  
  # All `assertr_error` S3 objects have `print` and `summary` methods
  # here; we will call `print` on all of the errors since `print`
  # will give us the complete/unabridged error report
  suppressWarnings(
    list_of_errors %>% 
      furrr::future_map(
        function(x) {
          message(x$message) # For output logging
          print(x$message) # For output logging
          return(x$error_df) # Get the detailed error information
        }
      ) %>% 
      dplyr::bind_rows() %>% # Bind together all the detailed error information
      {.} -> error_df
  )
  
  error_df %>% 
    dplyr::pull(index) %>% # Get the indices of the affected rows
    {.} -> indices
  
  data %>% 
    tibble::rownames_to_column() %>% # Add a temporary row index
    tidylog::filter(!(rowname %in% indices)) %>% # Filter out the bad rows and log actions
    dplyr::select(-rowname) %>% # Remove temporary row index
    {.} -> data
  
  attr(data, "data_errors") <- error_df # Set an attribute of the data containing errors found
  return(data) 
}

Here's the traceback following the error:

Error in data.frame(verb = verb, redux_fn = NA, predicate = name.of.predicate,  :
  arguments imply differing number of rows: 1, 3, 97
> traceback()
13: stop(gettextf("arguments imply differing number of rows: %s",
        paste(unique(nrows), collapse = ", ")), domain = NA)
12: data.frame(verb = verb, redux_fn = NA, predicate = name.of.predicate,
        column = column, index = unname(index.of.violations), value = unname(offending.elements))
11: make.assertr.assert.error("assert", name.of.predicate, col.name,
        num.violations, index.of.violations, offending.elements)
10: FUN(X[[i]], ...)
9: lapply(colnames(log.mat), function(col.name) {
       col <- log.mat[, col.name]
       num.violations <- sum(!col)
       if (num.violations == 0)
           return(NULL)
       index.of.violations <- which(!col)
       offending.elements <- sub.frame[[col.name]][index.of.violations]
       an_error <- make.assertr.assert.error("assert", name.of.predicate,
           col.name, num.violations, index.of.violations, offending.elements)
       return(an_error)
   })
8: assert(., in_set(NA_character_, "PEP N-R", "PEP N", "PEP SN",
       "PEP RO", "PEP NG", "PEP L", "PEP IO", "PEP SN-R", "PEP L-R",
       "PEP N-A", "PEP RO-R", "SOE", "PEP SN-A", "PEP NG-A", "SIE",
       "PEP NG-R"), `SUB-CATEGORY`)
7: function_list[[i]](value)
6: freduce(value, `_function_list`)
5: `_fseq`(`_lhs`)
4: eval(quote(`_fseq`(`_lhs`)), env, env)
3: eval(quote(`_fseq`(`_lhs`)), env, env)
2: withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
1: dat %>% chain_start %>% assert(in_set(NA_character_, "PEP N-R",
       "PEP N", "PEP SN", "PEP RO", "PEP NG", "PEP L", "PEP IO",
       "PEP SN-R", "PEP L-R", "PEP N-A", "PEP RO-R", "SOE", "PEP SN-A",
       "PEP NG-A", "SIE", "PEP NG-R"), `SUB-CATEGORY`) %>% chain_end(error_fun = filter_bad) %>%
       {
           .
       }

It seems that assertr is trying to construct a data.frame containing error information, with vectors of differing lengths. There could be something quirky happening in my data, but it's difficult for me to tell because the data contains over four million rows, and I don't know what could be triggering this behaviour. In any case, there's some case that causes a crash that isn't caught in assertr, where the crash happens. Can you investigate, please?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions