问题
I have a recursive function kindly explained to me by @thothal here This allows me to recursively get dataframes based on looking up a character string in the parent dataframe. With the example I provided this works great.
However I am now working on further tables where the elements in the child are present in the parent and vice versa. This leads to an infinte loop in the recursive function.
To repeat the original question with changes:
Numsdf1<-c("C123","C456","C789")
Textdf1<-c("Harry","Bobby","Terry")
df1<-data.frame(Numsdf1,Textdf1,stringsAsFactors=FALSE)
The second dataframe is the result of looking up the string "C123"
NumsC123<-c("C123","Noo","Too")
TextC123<-c("Tim","Slim","Shim")
C123<-data.frame(NumsC123,TextC123,stringsAsFactors=FALSE)
The third dataframe is a result of looking up "Coo"
NumsCoo<-c("S144","S199","S743")
TextCoo<-c("Ellie","Bellie","Tellie")
Coo<-data.frame(NumsCoo,TextCoo,stringsAsFactors=FALSE)
The fourth is the result of looking up "Noo"
NumsNoo<-c("GHS","THE","PAA")
TextNoo<-c("Front","Bunt","Shunt")
Noo<-data.frame(NumsNoo,TextNoo,stringsAsFactors=FALSE)
The solution originally was:
library(tidyverse)
get_all_dfs <- function(df) {
lapply(df[, 1], function(elem) {
print(paste("Looking for element", elem))
# use mget because we can use ifnotfound despite we are requesting only one element
next_df <- mget(elem, env = .GlobalEnv, ifnotfound = NA)
if (!is.na(next_df)) {
unlist(get_all_dfs(next_df[[1]]), F)
} else {
list(setNames(df, c("col1", "col2")))
}
})
}
flatten_dfr(get_all_dfs(df1)) %>% unique()
This means that when I run the function I get a loop which I can't break out of. So instead of the intended result of:
C123 -> Coo -> S144 -> S199 -> S743 -> Noo -> GHS -> THE -> PAA -> Too -> C456 -> C789
I get
C123 -> Coo -> C123 -> Coo -> C123 etc.
What can I do to prevent this?
Update 1
I implemented the solution from @thothal. The problem I had was that the lookup function I use returns a dataframe rather than a list so I created a list to store the Global environment too. However the loop still occurs. Here is the updated code:
get_all_dfs_rec <- function(df, my_env) {
lapply(df$relatedIdEx, function(elem) {
print(paste("Looking for element", elem))
next_df <- myGIConcepts(elem) ###This returns a dataframe
next_df<-list(next_df,my_env) ###Environment variable kept in a list
if (!is.na(next_df)) {
rm(list = elem, envir = my_env)
unlist(get_all_dfs_rec(next_df[[1]], my_env), FALSE)
} else {
list(setNames(df, c("col1", "col2")))
}
})
}
get_all_dfs <- function(df_start) {
## create a new environment
my_env <- new.env()
## and add all 'data.frames' from the global environment to it
walk(ls(.GlobalEnv), ~ {
elem <- get(.x, env = .GlobalEnv);
if (class(elem) == "data.frame") my_env[[.x]] <- elem})
flatten_dfr(get_all_dfs_rec(df_start, my_env)) %>% unique()
}
回答1:
You can put all of your data frames in an own environment and once they are found remove it from there:
get_all_dfs_rec <- function(df, my_env) {
lapply(df[, 1], function(elem) {
print(paste("Looking for element", elem))
# use mget because we can use ifnotfound despite we are requesting only one element
next_df <- mget(elem, env = my_env, ifnotfound = NA)
if (!is.na(next_df)) {
# use list, otherwise rm tries to remove elem (which does not exist in the env)
rm(list = elem, envir = my_env)
unlist(get_all_dfs_rec(next_df[[1]], my_env), FALSE)
} else {
list(setNames(df, c("col1", "col2")))
}
})
}
get_all_dfs <- function(df_start) {
## create a new environment
my_env <- new.env()
## and add all 'data.frames' from the global environment to it
walk(ls(.GlobalEnv), ~ {
elem <- get(.x, env = .GlobalEnv);
if (class(elem) == "data.frame") my_env[[.x]] <- elem})
flatten_dfr(get_all_dfs_rec(df_start, my_env)) %>% unique()
}
来源:https://stackoverflow.com/questions/54912421/how-to-prevent-infinite-loop-in-dataframe-lookup-where-elements-are-bi-direction