Skip to content

Commit

Permalink
fix: more fixes for str_slice() (#162)
Browse files Browse the repository at this point in the history
* init

* reduce n of tests
  • Loading branch information
etiennebacher authored Jan 7, 2025
1 parent 102ad5e commit 6d40aed
Show file tree
Hide file tree
Showing 5 changed files with 402 additions and 392 deletions.
10 changes: 8 additions & 2 deletions R/funs-string.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ pl_str_sub_stringr <- function(string, start, end = NULL) {
length
)$otherwise(2000)

foo2 <- pl$when(start_is_zero & (end_is_null | end$abs() <= len_string))$then(
len_string - end - 1
foo2 <- pl$when(start_is_zero & end > 0 & end <= len_string)$then(
end
)$otherwise(2000)

foo3 <- pl$when(start >= 0 & end_is_null)$then(len_string - start)$otherwise(
Expand Down Expand Up @@ -282,6 +282,9 @@ pl_str_sub_stringr <- function(string, start, end = NULL) {
when(start_is_zero & end > 0 & end <= len_string)$
then(string$str$slice(0, foo2))$

when(start_is_zero & end > 0 & end > len_string)$
then(string)$

when(start_is_zero & end < 0 & end$abs() <= len_string)$
then(string$str$slice(0, foo6))$

Expand Down Expand Up @@ -318,6 +321,9 @@ pl_str_sub_stringr <- function(string, start, end = NULL) {
when(start < 0 & end < 0 & start$abs() > len_string & end$abs() > len_string)$
then(pl$lit(""))$

when(start < 0 & end_is_zero & start$abs() > len_string)$
then(pl$lit(""))$

when(start > 0 & end > 0 & start > end)$then(pl$lit(""))
}

Expand Down
288 changes: 144 additions & 144 deletions tests/testthat/test-funs-string-lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,100 +3,100 @@
Sys.setenv('TIDYPOLARS_TEST' = TRUE)

test_that("paste() and paste0() work", {
for_all(
tests = 40,
string = character_(any_na = TRUE),
separator = character_(len = 1),
property = function(string, separator) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_lazy(
mutate(test, foo = paste(x1, "he")) |>
pull(foo),
mutate(test_df, foo = paste(x1, "he")) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = paste(x1, "he", sep = separator)) |>
pull(foo),
mutate(test_df, foo = paste(x1, "he", sep = separator)) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = paste0(x1, "he")) |>
pull(foo),
mutate(test_df, foo = paste0(x1, "he")) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = paste0(x1, "he", x1)) |>
pull(foo),
mutate(test_df, foo = paste0(x1, "he", x1)) |>
pull(foo)
)
}
)
for_all(
tests = 40,
string = character_(any_na = TRUE),
separator = character_(len = 1),
property = function(string, separator) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_lazy(
mutate(test, foo = paste(x1, "he")) |>
pull(foo),
mutate(test_df, foo = paste(x1, "he")) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = paste(x1, "he", sep = separator)) |>
pull(foo),
mutate(test_df, foo = paste(x1, "he", sep = separator)) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = paste0(x1, "he")) |>
pull(foo),
mutate(test_df, foo = paste0(x1, "he")) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = paste0(x1, "he", x1)) |>
pull(foo),
mutate(test_df, foo = paste0(x1, "he", x1)) |>
pull(foo)
)
}
)
})

patrick::with_parameters_test_that(
"several non-regex functions work",
{
for_all(
tests = 40,
string = character_(any_na = TRUE),
property = function(string) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

pl_code <- paste0("mutate(test, foo = ", fun, "(string)) |> pull(foo)")
tv_code <- paste0(
"mutate(test_df, foo = ",
fun,
"(string)) |> pull(foo)"
)

expect_equal_lazy(
eval(parse(text = pl_code)),
eval(parse(text = tv_code)),
)
}
)
},
fun = c("str_to_upper", "str_to_lower", "str_length", "str_squish")
"several non-regex functions work",
{
for_all(
tests = 40,
string = character_(any_na = TRUE),
property = function(string) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

pl_code <- paste0("mutate(test, foo = ", fun, "(string)) |> pull(foo)")
tv_code <- paste0(
"mutate(test_df, foo = ",
fun,
"(string)) |> pull(foo)"
)

expect_equal_lazy(
eval(parse(text = pl_code)),
eval(parse(text = tv_code)),
)
}
)
},
fun = c("str_to_upper", "str_to_lower", "str_length", "str_squish")
)

test_that("str_trim() works", {
for_all(
tests = 40,
string = character_(any_na = TRUE),
side = quickcheck::one_of(
constant("both"),
constant("left"),
constant("right")
),
property = function(string, side) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_lazy(
mutate(test, foo = str_trim(x1)) |>
pull(foo),
mutate(test_df, foo = str_trim(x1)) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = str_trim(x1, side = side)) |>
pull(foo),
mutate(test_df, foo = str_trim(x1, side = side)) |>
pull(foo)
)
}
)
for_all(
tests = 40,
string = character_(any_na = TRUE),
side = quickcheck::one_of(
constant("both"),
constant("left"),
constant("right")
),
property = function(string, side) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_lazy(
mutate(test, foo = str_trim(x1)) |>
pull(foo),
mutate(test_df, foo = str_trim(x1)) |>
pull(foo)
)

expect_equal_lazy(
mutate(test, foo = str_trim(x1, side = side)) |>
pull(foo),
mutate(test_df, foo = str_trim(x1, side = side)) |>
pull(foo)
)
}
)
})

# TODO: Problem is that I don't have a way to check that length of string is the
Expand Down Expand Up @@ -134,68 +134,68 @@ test_that("str_trim() works", {
# })

test_that("str_dup() works", {
for_all(
tests = 100,
string = character_(any_na = TRUE),
# Very high numbers crash the session, I guess because of stringr
times = numeric_bounded(-10000, 10000, any_na = TRUE),
property = function(string, times) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_or_both_error(
mutate(test, foo = str_dup(x1, times = times)) |>
pull(foo),
mutate(test_df, foo = str_dup(x1, times = times)) |>
pull(foo)
)
}
)
for_all(
tests = 100,
string = character_(any_na = TRUE),
# Very high numbers crash the session, I guess because of stringr
times = numeric_bounded(-10000, 10000, any_na = TRUE),
property = function(string, times) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_or_both_error(
mutate(test, foo = str_dup(x1, times = times)) |>
pull(foo),
mutate(test_df, foo = str_dup(x1, times = times)) |>
pull(foo)
)
}
)
})

test_that("str_sub() works", {
for_all(
tests = 200,
string = character_(any_na = TRUE),
start = numeric_(any_na = TRUE),
end = numeric_(any_na = TRUE),
property = function(string, start, end) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_or_both_error(
mutate(test, foo = str_sub(x1, start, end)) |>
pull(foo),
mutate(test_df, foo = str_sub(x1, start, end)) |>
pull(foo)
)
}
)
for_all(
tests = 200,
string = character_(any_na = TRUE),
start = numeric_(any_na = TRUE),
end = numeric_(any_na = TRUE),
property = function(string, start, end) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_or_both_error(
mutate(test, foo = str_sub(x1, start, end)) |>
pull(foo),
mutate(test_df, foo = str_sub(x1, start, end)) |>
pull(foo)
)
}
)
})

test_that("substr() works", {
# substr() doesn't error with different lengths but polars does. I don't want
# this weird case to prevent quickcheck to run, especially since this is a
# weird behavior in base R and we're more conservative on this.
length <- sample(1:10, 1)

for_all(
tests = 200,
string = character_(any_na = TRUE, len = length),
start = numeric_(any_na = TRUE, len = length),
end = numeric_(any_na = TRUE, len = length),
property = function(string, start, end) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_or_both_error(
mutate(test, foo = substr(x1, start, end)) |>
pull(foo),
mutate(test_df, foo = substr(x1, start, end)) |>
pull(foo)
)
}
)
# substr() doesn't error with different lengths but polars does. I don't want
# this weird case to prevent quickcheck to run, especially since this is a
# weird behavior in base R and we're more conservative on this.
length <- sample(1:10, 1)

for_all(
tests = 200,
string = character_(any_na = TRUE, len = length),
start = numeric_(any_na = TRUE, len = length),
end = numeric_(any_na = TRUE, len = length),
property = function(string, start, end) {
test_df <- data.frame(x1 = string)
test <- pl$LazyFrame(x1 = string)

expect_equal_or_both_error(
mutate(test, foo = substr(x1, start, end)) |>
pull(foo),
mutate(test_df, foo = substr(x1, start, end)) |>
pull(foo)
)
}
)
})

Sys.setenv('TIDYPOLARS_TEST' = FALSE)
Loading

0 comments on commit 6d40aed

Please sign in to comment.