Use R to Download an individual shared file from a *Shared* Google Drive directory

房东的猫 提交于 2019-12-12 07:04:40

问题


My intent was to download a file from a shared GoogleDrive directory in R. A GoogleDrive directory I do not own, one that I just had access to. This seemed to be more complicated than I had realized.

My intent was to be able to use the shared data file from the user's directory, to be able to incorporate the shared file URL into an R script wherein the data file can be downloaded and manipulated in R.

Note: Surprisingly, I was able to solve my own problem, thus it felt only fair to share both the problem and solution. :-)

Problem:

The file(s) in the code below download but are corrupted in the process. What am I missing? All help gratefully received.

Code Example

# Load required Packages:
requiredPackages <-
  c("rio", "plm", "splm", "tmaptools", "spdep", "fields", 
    "readxl")

ipak <- function(pkg) {
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg))
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}

ipak(requiredPackages)

## Specify shared Google Drive URL:
pdataURL = "https://drive.google.com/open?id=1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn" # data1.xlsx
sdataURL = "https://drive.google.com/open?id=1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW" # nuts2a.shp

# Specify datafile names, location and download.
pdataDest = file.path("./data/data1.xlsx")
sdataDest = file.path("./data/nut2sa.shp")

download.file(pdataURL, destfile = pdataDest, method = "wget", mode = "wb")
download.file(sdataURL, destfile = sdataDest, method = "wget", mode = "wb")

# Load in data and then manipulate
pdata <- read_excel(pdataDest)
shape_nuts <- read_shape(sdataDest)

Screen Image


回答1:


Solution

To do this you need to:

  1. Obtain the URL from the GoogleShare directory
  2. Replace "open?" in the URL with us?export=download&
  3. Download the data files... :-)

Code Example Solution

requiredPackages <-
  c("rio", "plm", "splm", "tmaptools", "spdep", "fields", 
    "readxl")

ipak <- function(pkg) {
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg))
    install.packages(new.pkg, dependencies = TRUE)
  sapply(pkg, require, character.only = TRUE)
}

ipak(requiredPackages)

# URL's obtained from Google Shared Directory
pShareDataURL = "https://drive.google.com/open?id=1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn"  ## data1.xlsx
sShareDataURL = "https://drive.google.com/open?id=1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW"  ## nuts2ashp

## Solution:
## 
## strip "open?" and replace with us?export=download&
## 

pdataURL <- gsub("open\\?", "uc\\?export=download\\&", pShareDataURL )
sdataURL <- gsub("open\\?", "uc\\?export=download\\&", sShareDataURL )

#pdataURL = "https://drive.google.com/uc?export=download&id=1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn"
#sdataURL = "https://drive.google.com/uc?export=download&id=1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW"

pdataDest = file.path("./data/data1.xlsx" )
sdataDest = file.path("./data/nuts2a.shp" )

download.file(pdataURL, destfile = pdataDest, method = "wget", mode = "wb")
download.file(sdataURL, destfile = sdataDest, method = "wget", mode = "wb")

pdata <- read_excel(pdataDest)
shape_nuts <- read_shape(sdataDest)

Console Output:

> requiredPackages <-
+   c("rio", "plm", "splm", "tmaptools", "spdep", "fields", 
+     "readxl")
> 
> ipak <- function(pkg) {
+   new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
+   if (length(new.pkg))
+     install.packages(new.pkg, dependencies = TRUE)
+   sapply(pkg, require, character.only = TRUE)
+ }
> 
> ipak(requiredPackages)
      rio       plm      splm tmaptools     spdep    fields    readxl 
     TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE 
> 
> # URL's obtained from Google Shared Directory
> pShareDataURL = "https://drive.google.com/open?id=1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn"  ## data1.xlsx
> sShareDataURL = "https://drive.google.com/open?id=1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW"  ## nuts2ashp
> 
> ## Solution:
> ## 
> ## strip "open?" and replace with us?export=download&
> ## 
> 
> pdataURL <- gsub("open\\?", "uc\\?export=download\\&", pShareDataURL )
> sdataURL <- gsub("open\\?", "uc\\?export=download\\&", sShareDataURL )
> 
> #pdataURL = "https://drive.google.com/uc?export=download&id=1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn"
> #sdataURL = "https://drive.google.com/uc?export=download&id=1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW"
> 
> pdataDest = file.path("./data/data1.xlsx" )
> sdataDest = file.path("./data/nuts2a.shp" )
> 
> download.file(pdataURL, destfile = pdataDest, method = "wget", mode = "wb")
--2018-05-25 09:13:44--  https://drive.google.com/uc?export=download&id=1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn
Resolving drive.google.com (drive.google.com)... 216.58.217.46
Connecting to drive.google.com (drive.google.com)|216.58.217.46|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0o-5o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/l9l497j1dj00652dq2gj2bupd0528a6k/1527264000000/13432863832188906115/*/1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn?e=download [following]
Warning: wildcards not supported in HTTP.
--2018-05-25 09:13:45--  https://doc-0o-5o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/l9l497j1dj00652dq2gj2bupd0528a6k/1527264000000/13432863832188906115/*/1PjDlLiA99-3xuGPhPLltRg8uod6zPAKn?e=download
Resolving doc-0o-5o-docs.googleusercontent.com (doc-0o-5o-docs.googleusercontent.com)... 216.58.217.33
Connecting to doc-0o-5o-docs.googleusercontent.com (doc-0o-5o-docs.googleusercontent.com)|216.58.217.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 700916 (684K) [application/vnd.openxmlformats-officedocument.spreadsheetml.sheet]
Saving to: ‘./data/data1.xlsx’

     0K .......... .......... .......... .......... ..........  7% 7.61M 0s
    50K .......... .......... .......... .......... .......... 14% 6.23M 0s
   100K .......... .......... .......... .......... .......... 21% 24.8M 0s
   150K .......... .......... .......... .......... .......... 29% 24.1M 0s
   200K .......... .......... .......... .......... .......... 36% 25.2M 0s
   250K .......... .......... .......... .......... .......... 43% 25.9M 0s
   300K .......... .......... .......... .......... .......... 51% 31.1M 0s
   350K .......... .......... .......... .......... .......... 58% 35.3M 0s
   400K .......... .......... .......... .......... .......... 65% 34.1M 0s
   450K .......... .......... .......... .......... .......... 73% 25.5M 0s
   500K .......... .......... .......... .......... .......... 80% 29.5M 0s
   550K .......... .......... .......... .......... .......... 87% 29.9M 0s
   600K .......... .......... .......... .......... .......... 94% 36.0M 0s
   650K .......... .......... .......... ....                 100% 19.9M=0.03s

2018-05-25 09:13:45 (19.3 MB/s) - ‘./data/data1.xlsx’ saved [700916/700916]

> download.file(sdataURL, destfile = sdataDest, method = "wget", mode = "wb")
--2018-05-25 09:13:45--  https://drive.google.com/uc?export=download&id=1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW
Resolving drive.google.com (drive.google.com)... 216.58.217.46
Connecting to drive.google.com (drive.google.com)|216.58.217.46|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0s-5o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/ngvfs925hhqq94mq7jmm5avpetg9tkcg/1527264000000/13432863832188906115/*/1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW?e=download [following]
Warning: wildcards not supported in HTTP.
--2018-05-25 09:13:46--  https://doc-0s-5o-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/ngvfs925hhqq94mq7jmm5avpetg9tkcg/1527264000000/13432863832188906115/*/1VJGL8aSJomvWCnw9FPEWTJsQ65StYdzW?e=download
Resolving doc-0s-5o-docs.googleusercontent.com (doc-0s-5o-docs.googleusercontent.com)... 216.58.217.33
Connecting to doc-0s-5o-docs.googleusercontent.com (doc-0s-5o-docs.googleusercontent.com)|216.58.217.33|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/octet-stream]
Saving to: ‘./data/nuts2a.shp’

     0K .......... .......... .......... .......... .......... 15.0M
    50K .......... .......... .......... .......... .......... 30.4M
   100K .......... .......... .......... .......... .......... 27.9M
   150K .......... .......... .......... .......... .......... 25.4M
   200K .......... .......... .......... .......... .......... 29.4M
   250K .......... .......... .......... .......... .......... 38.6M
   300K .......... .......... .......... .......... .......... 40.0M
   350K .......... .......... .......... .......... .......... 30.0M
   400K .......... .......... .......... .......... .......... 31.7M
   450K .......... .......... .......... .......... .......... 24.3M
   500K .......... .......... .......... .......... .......... 27.4M
   550K .......... .......... .......... .......... .......... 32.7M
   600K .......... .......... .......... .......... .......... 46.5M
   650K .......... .......... .......... .......... .......... 44.3M
   700K .......... .......... .......... .......... .......... 52.3M
   750K .......... .......... .......... .......... .......... 27.7M
   800K .......... .......... .......... .......... .......... 47.3M
   850K .......... .......... .......... .......... .......... 43.1M
   900K .......... .......... .......... .......... .......... 38.1M
   950K .......... .......... .......... .......... .......... 36.7M
  1000K .......... .......... .......... .......... .......... 47.5M
  1050K .......... .......... .......... .......... ..........  148M
  1100K .......... .......... .......... .......... .......... 43.3M
  1150K .......... .......... .......... .......... .......... 79.4M
  1200K .......... .......... .......... .......... .......... 48.3M
  1250K .......... .......... .......... .......... .......... 49.4M
  1300K .......... .......... .......... .......... ..........  105M
  1350K .......... .......... .......... .......... .......... 48.5M
  1400K .......... .......... .......... .......... .......... 82.3M
  1450K .......... .......... .......... .......... .......... 39.1M
  1500K .......... .......... .......... .......... .......... 29.0M
  1550K .......... .......... .......... .......... .......... 90.6M
  1600K .......... .......... .......... .......... .......... 86.0M
  1650K .......... .......... .......... .......... .......... 89.9M
  1700K .......... .......... .......... .......... .......... 59.1M
  1750K .......... .......... .......... .......... .......... 56.9M
  1800K .......... .......... .......... .......... ..........  102M
  1850K .......... .......... .......... .......... .......... 57.6M
  1900K .......... .......... .......... .......... .......... 61.6M
  1950K .......... .......... .......... .......... .......... 67.3M
  2000K .......... .......... .......... .......... .......... 60.9M
  2050K .......... .......... .......... .......... .......... 33.8M
  2100K .......... .......... .......... .......... .......... 82.9M
  2150K .......... .......... .......... .......... ..........  126M
  2200K .......... .......... .......... .......... .......... 66.2M
  2250K .......... .......... .......... .......... ..........  137M
  2300K .......... .......... .......... .......... ..........  101M
  2350K .......... .......... .......... .......... .......... 93.7M
  2400K .......... .......... .......... .......... .......... 85.4M
  2450K .......... .......... .......... .......... ..........  106M
  2500K .......... .......... .......... .......... .......... 92.0M
  2550K .......... .......... .......... .......... ..........  131M
  2600K .......... .......... .......... .......... .......... 77.5M
  2650K .......... .......... .......... .......... .......... 75.0M
  2700K .......... .......... .......... .......... .......... 36.3M
  2750K .......... .......... .......... .......... ..........  136M
  2800K .......... .......... .......... .......... .......... 87.2M
  2850K .......... .......... .......... .......... .......... 88.8M
  2900K .......... .......... .......... .......... .......... 81.2M
  2950K .......... .......... .......... .......... ..........  127M
  3000K .......... .......... .......... .......... .......... 92.3M
  3050K .......... .......... .......... .......... .......... 49.6M
  3100K .......... .......... .......... .......... .......... 80.4M
  3150K .......... .......... .......... .......... ..........  108M
  3200K .......... .......... .......... .......... ..........  115M
  3250K .......... .......... .......... .......... ..........  124M
  3300K .......... .......... .......... .......... ..........  128M
  3350K .......... .......... .......... .......... .......... 88.6M
  3400K .......... .......... .......... .......... .......... 51.7M
  3450K .......... .......... .......... .......... ..........  174M
  3500K .......... .......... .......... .......... ..........  128M
  3550K .......... .......... .......... .......... ..........  125M
  3600K ........                                                190M=0.07s

2018-05-25 09:13:46 (53.6 MB/s) - ‘./data/nuts2a.shp’ saved [3695560]

> 
> pdata <- read_excel(pdataDest)
> shape_nuts <- read_shape(sdataDest)
> 


来源:https://stackoverflow.com/questions/50532206/use-r-to-download-an-individual-shared-file-from-a-shared-google-drive-directo

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!