admin 管理员组

文章数量: 1184232

I have a dataset that has the weighted mean and weighted standard error of many variables.

input.ds.wt = tibble(
  id = c(1,2,3,4,5,6),
  wt.mean_vOne = c(1, 1, 1.3, 2.3, 1, 0),
  wt.mean_vTwo = rep(c(0.8,0.2), 3),
  wt.SE_vOne = c(0.1,0.01,0.2,0.02,0.3,0.03),
  wt.SE_vTwo = c(0.03,0.3,0.01,0.1,0.4,0.04)
)

> input.ds.wt
# A tibble: 6 x 5
     id wt.mean_vOne wt.mean_vTwo wt.SE_vOne wt.SE_vTwo
  <dbl>        <dbl>        <dbl>      <dbl>      <dbl>
1     1          1            0.8       0.1        0.03
2     2          1            0.2       0.01       0.3 
3     3          1.3          0.8       0.2        0.01
4     4          2.3          0.2       0.02       0.1 
5     5          1            0.8       0.3        0.4 
6     6          0            0.2       0.03       0.04

I think that using some combination of pivot_longer() followed by pivot_wider() will help me calculate RSE_vOne, RSE_vTwo, when I have many more variables than vOne, vTwo. However, I am open to any flexible solution.

What I want is to calculate:

RSE_vOne = wt.SE_vOne/wt.mean_vOne
RSE_vTwo  = wt.SE_vTwo/wt.mean_vTwo
...
RSE_vN = wt.SE_vN/wt.mean_vN

for each row of input.ds.wt, but without specifying the defintion for each vOne, vTwo, ..., etc. In this dataset every vOne, vTwo, ..., vN has a corresponding wt.mean_vN and wt.SE_vN.

I have a dataset that has the weighted mean and weighted standard error of many variables.

input.ds.wt = tibble(
  id = c(1,2,3,4,5,6),
  wt.mean_vOne = c(1, 1, 1.3, 2.3, 1, 0),
  wt.mean_vTwo = rep(c(0.8,0.2), 3),
  wt.SE_vOne = c(0.1,0.01,0.2,0.02,0.3,0.03),
  wt.SE_vTwo = c(0.03,0.3,0.01,0.1,0.4,0.04)
)

> input.ds.wt
# A tibble: 6 x 5
     id wt.mean_vOne wt.mean_vTwo wt.SE_vOne wt.SE_vTwo
  <dbl>        <dbl>        <dbl>      <dbl>      <dbl>
1     1          1            0.8       0.1        0.03
2     2          1            0.2       0.01       0.3 
3     3          1.3          0.8       0.2        0.01
4     4          2.3          0.2       0.02       0.1 
5     5          1            0.8       0.3        0.4 
6     6          0            0.2       0.03       0.04

I think that using some combination of pivot_longer() followed by pivot_wider() will help me calculate RSE_vOne, RSE_vTwo, when I have many more variables than vOne, vTwo. However, I am open to any flexible solution.

What I want is to calculate:

RSE_vOne = wt.SE_vOne/wt.mean_vOne
RSE_vTwo  = wt.SE_vTwo/wt.mean_vTwo
...
RSE_vN = wt.SE_vN/wt.mean_vN

for each row of input.ds.wt, but without specifying the defintion for each vOne, vTwo, ..., etc. In this dataset every vOne, vTwo, ..., vN has a corresponding wt.mean_vN and wt.SE_vN.

Share Improve this question edited Feb 3 at 19:59 abrar asked Feb 3 at 10:01 abrarabrar 1351 silver badge11 bronze badges
Add a comment  | 

3 Answers 3

Reset to default 1

Update (with code breakdown)

As requested by the OP, it might be better to have break down the code to steps and show how it works

  • Step 1
> (s1 <- input.ds.wt %>%
+     select(!id))
# A tibble: 6 × 4
  wt.mean_vOne wt.mean_vTwo wt.SE_vOne wt.SE_vTwo
         <dbl>        <dbl>      <dbl>      <dbl>
1          1            0.8       0.1        0.03
2          1            0.2       0.01       0.3
3          1.3          0.8       0.2        0.01
4          2.3          0.2       0.02       0.1
5          1            0.8       0.3        0.4
6          0            0.2       0.03       0.04
  • Step 2
> (s2 <- str_c("RSE_", sub(".*_", "", names(s1))))
[1] "RSE_vOne" "RSE_vTwo" "RSE_vOne" "RSE_vTwo"
  • Step 3
> (s3 <- split.default(s1, s2))
$RSE_vOne
# A tibble: 6 × 2
  wt.mean_vOne wt.SE_vOne
         <dbl>      <dbl>
1          1         0.1
2          1         0.01
3          1.3       0.2
4          2.3       0.02
5          1         0.3
6          0         0.03

$RSE_vTwo
# A tibble: 6 × 2
  wt.mean_vTwo wt.SE_vTwo
         <dbl>      <dbl>
1          0.8       0.03
2          0.2       0.3
3          0.8       0.01
4          0.2       0.1
5          0.8       0.4
6          0.2       0.04
  • Step 4
> (s4 <- map_dfc(s3, \(x) x[[2]] / x[[1]]))
# A tibble: 6 × 2
   RSE_vOne RSE_vTwo
      <dbl>    <dbl>
1   0.1       0.0375
2   0.01      1.5
3   0.154     0.0125
4   0.00870   0.5
5   0.3       0.5
6 Inf         0.2
  • Step 5
> input.ds.wt %>%
+     cbind(s4)
  id wt.mean_vOne wt.mean_vTwo wt.SE_vOne wt.SE_vTwo    RSE_vOne RSE_vTwo
1  1          1.0          0.8       0.10       0.03 0.100000000   0.0375
2  2          1.0          0.2       0.01       0.30 0.010000000   1.5000
3  3          1.3          0.8       0.20       0.01 0.153846154   0.0125
4  4          2.3          0.2       0.02       0.10 0.008695652   0.5000
5  5          1.0          0.8       0.30       0.40 0.300000000   0.5000
6  6          0.0          0.2       0.03       0.04         Inf   0.2000

I don't think you need double pivots, but splitting and collapsing the paired columns should be enough.

Probably you can try this

library(dplyr)

input.ds.wt %>%
    cbind({
        .
    } %>%
        select(!id) %>%
        split.default(str_c("RSE_", sub(".*_", "", names(.)))) %>%
        map_dfc(\(x) x[[2]] / x[[1]]))

where

  • . in {.} is the placeholder for input.ds.wt
  • sub(".*_","",names(.)) replace the substring that matches the pattern ".*_", e.g., "wt.mean_" or "wt.SE_", with empty ""
  • split.default splits the dataframe by columns, according to the above grouping patterns in terms of column names
  • x in map_dfc denotes each paired columns, e.g., wt.mean_vOne (x[[1]]) and wt.SE_vOne (x[[2]])

and you will obtain

  id wt.mean_vOne wt.mean_vTwo wt.SE_vOne wt.SE_vTwo    RSE_vOne RSE_vTwo
1  1          1.0          0.8       0.10       0.03 0.100000000   0.0375
2  2          1.0          0.2       0.01       0.30 0.010000000   1.5000
3  3          1.3          0.8       0.20       0.01 0.153846154   0.0125
4  4          2.3          0.2       0.02       0.10 0.008695652   0.5000
5  5          1.0          0.8       0.30       0.40 0.300000000   0.5000
6  6          0.0          0.2       0.03       0.04         Inf   0.2000

If they are not sorted you can use:

wt.SE = input.ds.wt %>% select(sort(starts_with("wt.SE")))
wt.mean = input.ds.wt %>% select(sort(starts_with("wt.mean")))

You don't need a pivot at all. You could use dplyr select:

wt.SE = input.ds.wt %>% select(starts_with("wt.SE"))
wt.mean = input.ds.wt %>% select(starts_with("wt.mean"))
rse = wt.SE / wt.mean

This solution assumes that the wt.SE and the wt.mean variables appear in the same order. If the variables wt.SE and wt.mean are not in the same order, you can use instead:

wt.SE = input.ds.wt %>% select(sort(starts_with("wt.SE")))
wt.mean = input.ds.wt %>% select(sort(starts_with("wt.mean")))
rse = wt.SE / wt.mean

本文标签:

Error[2]: Invalid argument supplied for foreach(), File: /www/wwwroot/roclinux.cn/tmp/view_template_quzhiwa_htm_read.htm, Line: 58
File: /www/wwwroot/roclinux.cn/tmp/route_read.php, Line: 205, include(/www/wwwroot/roclinux.cn/tmp/view_template_quzhiwa_htm_read.htm)
File: /www/wwwroot/roclinux.cn/tmp/index.inc.php, Line: 129, include(/www/wwwroot/roclinux.cn/tmp/route_read.php)
File: /www/wwwroot/roclinux.cn/index.php, Line: 29, include(/www/wwwroot/roclinux.cn/tmp/index.inc.php)