Hi,
I am trying to download the Advan neighborhood patterns data, follwing the instructions in the recent post: Dewey Data Bulk Download in R (new system)
The command get_file_list (defined in the linked post) appears to be working as intended, producing a table with download links. When I try to run read_sample_data or download_files, I get the following error message:
Error in make.names(col.names, unique = TRUE) :
invalid multibyte string 1
In addition: Warning message:
In read.table(file = file, header = header, sep = sep, quote = quote, :
line 1 appears to contain embedded nulls
Here is an example of code that returns the above error.
# Get the list of files on server
get_file_list = function(apikey, product_path, print_info = T) {
response = tryCatch(
{
GET(url=product_path,
add_headers(.headers = c("X-API-KEY" = apikey,
"accept" = "application/json")))
}, warning = function(cond) {
message("Warning in GET.")
message(cond)
message("")
}, error = function(cond) {
message("Error in GET.")
message(cond)
message("")
}
)
if(is.null(response)) {
return(NULL)
} else if(response$status_code == 401) {
print(response);
return(NULL);
}
res_json = content(response)
num_files = res_json$metadata$num_files
total_size_mb = res_json$metadata$total_size_mb
avg_file_size_mb = res_json$metadata$avg_file_size_mb
expires_at = res_json$metadata$expires_at
if(print_info) {
message("Files information---------------------------------------")
message(paste0("Number of files: ", num_files))
message(paste0("Total size (MB): ", total_size_mb))
message(paste0("Average file size (MB): ", avg_file_size_mb))
message(paste0("Link expires: ", expires_at))
message("--------------------------------------------------------")
}
files_df = data.frame(download_link = unlist(res_json$download_links))
split_links = do.call(rbind.data.frame,
strsplit(files_df$download_link, "?", fixed = T))
files_df$file_link = split_links[, 1];
files_df = files_df[order(files_df$file_link), ];
# Extract the file name
file_names = apply(data.frame(files_df$file), 1,
function(x) tail(unlist(strsplit(x, "/")), n= 1) );
files_df$file_name = file_names;
return(files_df);
}
# Read URL data into memory
read_sample_data = function(url, nrows = 100) {
if(nrows > 1000) {
message("Warning: set nrows no greater than 1000.");
nrows = 1000;
}
df = read.csv(file = url, nrows = nrows);
return(df);
}
api_key = (removed)
endpoint = (removed)
product_id = (removed)
files_df = get_file_list(api_key,endpoint, print_info = T)
jan2023 = filter(files_df, str_detect(file_link, '/2023-01-01/'))
sample_data = read_sample_data(jan2023$download_link[1], nrows = 100);
Appreciate any help you can provide. Thanks!
Joel