benchmarking.Rmd
library(zddr)
As a base case, consider a trivial example of a dataframe containing the the cross multiplication of several long lists of variables.
library(magrittr)
start_time <- Sys.time()
tb<-tibble::tibble(
a = list(c( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20)),
#b = list(c(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)),
c = list(c(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60)),
d = list(c(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80)),
e = list(c(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100))
) %>%
tidyr::unnest(a) %>%
#tidyr::unnest(b) %>%
tidyr::unnest(c) %>%
tidyr::unnest(d) %>%
tidyr::unnest(e) %>%
dplyr::rowwise() %>%
dplyr::transmute(X = list(c(a,c,d,e)))
Sys.time() - start_time
#> Time difference of 4.363413 secs
tb
#> # A tibble: 160,000 x 1
#> # Rowwise:
#> X
#> <list>
#> 1 <dbl [4]>
#> 2 <dbl [4]>
#> 3 <dbl [4]>
#> 4 <dbl [4]>
#> 5 <dbl [4]>
#> 6 <dbl [4]>
#> 7 <dbl [4]>
#> 8 <dbl [4]>
#> 9 <dbl [4]>
#> 10 <dbl [4]>
#> # … with 159,990 more rows
pryr::object_size(tb)
#> Registered S3 method overwritten by 'pryr':
#> method from
#> print.bytes Rcpp
#> 24.3 MB
The tibble is an inefficient way to deal with sets, especially if there is a need to remove nonminimal sets. For example, say the above family add a single set of 1
, making all existing sets containing 1
nonminimal.
start_time <- Sys.time()
tb %>% dplyr::filter(!1 %in% X)
#> # A tibble: 152,000 x 1
#> # Rowwise:
#> X
#> <list>
#> 1 <dbl [4]>
#> 2 <dbl [4]>
#> 3 <dbl [4]>
#> 4 <dbl [4]>
#> 5 <dbl [4]>
#> 6 <dbl [4]>
#> 7 <dbl [4]>
#> 8 <dbl [4]>
#> 9 <dbl [4]>
#> 10 <dbl [4]>
#> # … with 151,990 more rows
Sys.time() - start_time
#> Time difference of 0.615664 secs
Perform this same operation using a ZDD from this package and note the space efficiency achieved.
library(zddr)
reset_zdd_store()
#> Resetting zdd_store.
start_time <- Sys.time()
zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
#zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)
#> Memory of ZDD Store: 0.25 MB, item count: 218 ( 1.15 KB/item)
#> Memory of ZDD Functions: 0.077 MB, item count: 358 ( 0.22 KB/item)
#> 5643917c867818eab1444c603aa23cfe : 160000 cutsets, min order: 4 max order: 4
#> 4-order
#> 160000
#>
#> More than 100 cutsets, not printing all those!
Sys.time() - start_time
#> Time difference of 0.3063161 secs
Now if the same number of elements are used, but in random order, note the difference in memory and time. This is a minor illustration of the importance of variable ordering.
reset_zdd_store()
#> Resetting zdd_store.
start_time <- Sys.time()
zdd_or( 85,44,24,41, 5,20,74,71,56,52,92,76,42,89, 65,16, 1,38,10,97) *
#zdd_or(21, 2,25,36,63,22,61,46,70,62,90,29,91,60, 79,83,51,39,81,58) *
zdd_or( 7,40,31,26,72,45,34,57,49,67,80,87,86,19, 4,59,17,14,75,53) *
zdd_or( 6,68,32,78,33,13,73,69,18,11,98,77,82,93, 95,23,99,96,50,88) *
zdd_or(43,15,94,84,66,64,54, 8,47,28,55,35,48,37,100, 9, 3,30,12,27)
#> Memory of ZDD Store: 1.15 MB, item count: 1016 ( 1.13 KB/item)
#> Memory of ZDD Functions: 0.923 MB, item count: 4773 ( 0.19 KB/item)
#> a6ba8b4945c86ab9205c731e2a68494a : 160000 cutsets, min order: 4 max order: 4
#> 4-order
#> 160000
#>
#> More than 100 cutsets, not printing all those!
Sys.time() - start_time
#> Time difference of 4.463271 secs
Check out how the size of the ZDD and the calculation time change when a single non-minimal ZDD is unioned with the first one above.
reset_zdd_store()
#> Resetting zdd_store.
start_time <- Sys.time()
zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
#zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) |
zdd(1) # 19*20^3+1 = 152,001
#> Memory of ZDD Store: 0.343 MB, item count: 296 ( 1.16 KB/item)
#> Memory of ZDD Functions: 0.143 MB, item count: 518 ( 0.28 KB/item)
#> 2d9f36c80c55b599caf00862b5468143 : 152001 cutsets, min order: 1 max order: 4
#> 1-order 4-order
#> 1 152000
#>
#> More than 100 cutsets, not printing all those!
Sys.time() - start_time
#> Time difference of 0.4411922 secs
Using the same example, add one additional union of a non-minimal ZDD and watch the change in ZDD complexity (memory) and calculation time.
reset_zdd_store()
#> Resetting zdd_store.
start_time <- Sys.time()
zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
#zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) |
zdd(1) | zdd(2) # 18*20^3+2 = 144,002
#> Memory of ZDD Store: 0.43 MB, item count: 373 ( 1.15 KB/item)
#> Memory of ZDD Functions: 0.177 MB, item count: 675 ( 0.26 KB/item)
#> e68e61973d287829f8ff1457c1d149cb : 144002 cutsets, min order: 1 max order: 4
#> 1-order 4-order
#> 2 144000
#>
#> More than 100 cutsets, not printing all those!
Sys.time() - start_time
#> Time difference of 0.6408699 secs
There is a slight reduction in time and space needs when minimizing a large family of sets against a higher ranked variable. (Compare to the original minimization example above.)
reset_zdd_store()
#> Resetting zdd_store.
start_time <- Sys.time()
(zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
#zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)) |
zdd(90) # 19*20^3+1 = 152,001
#> Memory of ZDD Store: 0.267 MB, item count: 229 ( 1.17 KB/item)
#> Memory of ZDD Functions: 0.114 MB, item count: 382 ( 0.3 KB/item)
#> 6e02f648e12b0a1d609061b52aea62cb : 152001 cutsets, min order: 1 max order: 4
#> 1-order 4-order
#> 1 152000
#>
#> More than 100 cutsets, not printing all those!
Sys.time() - start_time
#> Time difference of 0.3703039 secs
reset_zdd_store()
#> Resetting zdd_store.
start_time <- Sys.time()
(zdd_or( 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20) *
#zdd_or(21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) *
zdd_or(41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60) *
zdd_or(61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80) *
zdd_or(81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100)) |
zdd_and(1,90)
#> Memory of ZDD Store: 0.334 MB, item count: 288 ( 1.16 KB/item)
#> Memory of ZDD Functions: 0.142 MB, item count: 514 ( 0.28 KB/item)
#> 426b3c4daedc32ea00c4371b4f683b99 : 159601 cutsets, min order: 2 max order: 4
#> 2-order 4-order
#> 1 159600
#>
#> More than 100 cutsets, not printing all those!
Sys.time() - start_time
#> Time difference of 0.4271069 secs