Combining two financial datasets, with interactive account balance variable over time

放肆的年华 提交于 2020-01-05 07:12:27

问题


I have a question related to a financial transactions dataset. I have two datasets:

The first one containing financial transactions with timestamp.

   Account_from  Account_to  Value  Timestamp  
1        1            2        25       1
2        1            3        25       1
3        2            1        50       2
4        2            3        20       2
5        2            4        25       2
6        1            2        40       3
7        3            1        20       3
8        2            4        25       3

The other dataset contains account information:

   Account_id  initial deposit
1       1            200
2       2            100
3       3            150
4       4            200

Now I would like to create a dataset, with financial transactions and the balance of the original account. Furthermore, I would like that the balance of the account changes of time with each transactions made, such that:

   Account_from  Account_to  Value  Timestamp  Initial_deposit  Old_bal_org  New_bal_org  Old_bal_des  New_bal_des
1        1            2        25       1            200            200          175         100          125
2        1            3        25       1            200            175          150         150          175
3        2            1        50       2            100            125          75          150          200
4        2            3        20       2            100            75           55          175          195
5        2            4        25       2            100            55           30          200          225
6        1            2        40       3            200            200          160         30           70
7        3            1        20       3            150            195          175         160          180
8        2            4        25       3            100            70           45          225          250

How would this be possible?

to reproduce data:

dftrans <- structure(list(Account_from = c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L
), Account_to = c(2L, 3L, 1L, 3L, 4L, 2L, 1L, 4L), Value = c(25, 
                                                             25, 50, 20, 25, 40, 20, 25), Timestamp = c(1L, 1L, 2L, 2L, 2L, 
                                                                                                        3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -8L))

dfacc <- structure(list(Account_id = c(1L, 2L, 3L, 4L), Initial__deposit = c(200, 100, 150, 200)), class = "data.frame", row.names = c(NA, -4L))

Thanks in advance


回答1:


One possible way to do this:

dftransFinal <- dftrans %>% 
  # create a record id to keep track of each transaction
  rowid_to_column(var = 'recordID') %>% 
  pivot_longer(cols = c(Account_to, Account_from), names_to = 'accountType',
               values_to = 'Account_id') %>% 
  left_join(dfacc, by = 'Account_id') %>% 
  # If the record is a 'from' set value to negative so that it is subtracted from balance
  mutate(Value = if_else(accountType == 'Account_from', -Value, Value)) %>% 
  group_by(Account_id) %>% 
  mutate(sum_changes = cumsum(Value),
         # calculate the cumulative sum with a lag for old_bal
         sum_changes_lag = lag(cumsum(Value), k = 1, default = 0),
         Old_bal_org = Initial__deposit + sum_changes_lag,
         New_bal_org = Initial__deposit + sum_changes) %>% 
  pivot_wider(names_from = 'accountType', values_from = c('Account_id', 'Old_bal_org',
                                                      'Initial__deposit',
                                                      'New_bal_org', 'Value'),
              id_cols = c('recordID', 'Timestamp')) %>% 
  # select, rename, and order columns
  select('Account_from' = 'Account_id_Account_from', 'Account_to' = 'Account_id_Account_to',
         'Value' = 'Value_Account_to', Timestamp, 'Initial__deposit' = 'Initial__deposit_Account_from',
     'Old_bal_org' = 'Old_bal_org_Account_from', 'New_bal_org' = 'New_bal_org_Account_from',
     'Old_bal_des' = 'Old_bal_org_Account_to', 'New_bal_des' = 'New_bal_org_Account_to')



回答2:


Using data.table and a similar approach to Amanda's of keeping a running ledger:

ledger <- dftrans[, .(rn=rep(rn, each=2L), Account_id=c(rbind(Account_from,Account_to)), 
    Value=c(sapply(Value, function(x) c(-1, 1) * x)))][,
        .(rn, DebitCredit=cumsum(Value)), .(Account_id)][
            dfacc, on=.(Account_id), Balance := Initial__deposit + DebitCredit]

ledger:

    Account_id rn DebitCredit Balance
 1:          1  1         -25     175
 2:          1  2         -50     150
 3:          1  3           0     200
 4:          1  6         -40     160
 5:          1  7         -20     180
 6:          2  1          25     125
 7:          2  3         -25      75
 8:          2  4         -45      55
 9:          2  5         -70      30
10:          2  6         -30      70
11:          2  8         -55      45
12:          3  2          25     175
13:          3  4          45     195
14:          3  7          25     175
15:          4  5          25     225
16:          4  8          50     250

Then using rolling and non-equi joins to look up desired columns:

dftrans[, Old_bal_org := fcoalesce(
    ledger[.SD, on=.(Account_id=Account_from, rn<rn), mult="last", Balance],
    init_from)]

dftrans[, New_bal_org := 
    ledger[.SD, on=.(Account_id=Account_from, rn), roll=Inf, init_from + DebitCredit]
]

dftrans[, Old_bal_des := fcoalesce(
    ledger[.SD, on=.(Account_id=Account_to, rn<rn), mult="last", Balance],
    init_to)]

dftrans[, New_bal_des := 
    ledger[.SD, on=.(Account_id=Account_to, rn), roll=Inf, init_to + DebitCredit]
]

output:

   Account_from Account_to Value Timestamp rn init_from init_to Old_bal_org New_bal_org Old_bal_des New_bal_des
1:            1          2    25         1  1       200     100         200         175         100         125
2:            1          3    25         1  2       200     150         175         150         150         175
3:            2          1    50         2  3       100     200         125          75         150         200
4:            2          3    20         2  4       100     150          75          55         175         195
5:            2          4    25         2  5       100     200          55          30         200         225
6:            1          2    40         3  6       200     100         200         160          30          70
7:            3          1    20         3  7       150     200         195         175         160         180
8:            2          4    25         3  8       100     200          70          45         225         250

data and look up initial deposit:

dftrans <- structure(list(Account_from = c(1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L
), Account_to = c(2L, 3L, 1L, 3L, 4L, 2L, 1L, 4L), Value = c(25, 
    25, 50, 20, 25, 40, 20, 25), Timestamp = c(1L, 1L, 2L, 2L, 2L, 
        3L, 3L, 3L)), class = "data.frame", row.names = c(NA, -8L))

dfacc <- structure(list(Account_id = c(1L, 2L, 3L, 4L), Initial__deposit = c(200, 100, 150, 200)), class = "data.frame", row.names = c(NA, -4L))

library(data.table)
setDT(dfacc)
setDT(dftrans)[, rn := .I][
    dfacc, on=.(Account_from=Account_id), init_from := Initial__deposit][
        dfacc, on=.(Account_to=Account_id), init_to := Initial__deposit]


来源:https://stackoverflow.com/questions/59561897/combining-two-financial-datasets-with-interactive-account-balance-variable-over

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!