UPDATE: The latest Python tutorial can be found here: Bulk Data Downloading in Python (API v3)
Hello, Dewey community. I am back to explore the Dewey datasets. This time with Python. I thought I created this R example based on a Python example, but it seems not? So, here, I made a Python version to download the bulk of files from Dewey. Hope this helps Python users.
import getpass
import json
import pandas as pd
import requests
DEWEY_TOKEN_URL = "https://marketplace.deweydata.io/api/auth/tks/get_token"
DEWEY_MP_ROOT = "https://marketplace.deweydata.io"
DEWEY_DATA_ROOT = "https://marketplace.deweydata.io/api/data/v2/list"
# Get access token
def get_access_token(username, passw):
response = requests.post(url=DEWEY_TOKEN_URL, data={}, auth=(username, passw))
json_dict = json.loads(response.text)
return json_dict["access_token"]
# Return file paths in the sub_path folder
def get_file_paths(token, sub_path=""):
response = requests.get((DEWEY_DATA_ROOT + sub_path),
headers={"Authorization" : "Bearer " + token})
json_text = json.loads(response.text)
response_df = pd.DataFrame.from_dict(json_text)
return response_df
# Download a single file from Dewey (src_url) to a local destination file (dest_file).
# Increase the timeout if you have a large file to download.
def download_file(token, src_url, dest_file, timeout=200):
response = requests.get(src_url, headers={"Authorization" : "Bearer " + token}, timeout=timeout)
open(dest_file, 'wb').write(response.content)
# Example ----------------------------------------------------------
# Avoid including your credentials in the code.
# You can hard type your credentials in the code as well though.
user_name = getpass.getpass("User name (email address)")
pass_word = getpass.getpass("Password")
# Get access token
tkn = get_access_token(user_name, pass_word)
tkn
# Get file paths in the "/2018/01/01/SAFEGRAPH/MPSP" sub folder.
file_paths = get_file_paths(token=tkn, sub_path="/2018/01/01/SAFEGRAPH/MPSP")
# Print first 5 paths
file_paths.head(5)
# Download the first file to C:/temp/, as an example.
# In the file_paths DataFrame,
# file_paths.loc[0, "url"] looks like:
# /api/data/v2/data/2018/01/01/SAFEGRAPH/MPSP/20180101-safegraph_mpsp_visit_panel_0'
# and file_paths.loc[0, "name"] looks like:
# visit_panel_summary.csv'.
src_url = DEWEY_MP_ROOT + file_paths.loc[0, "url"]
dest_file = "C:/temp/" + file_paths.loc[0, "name"]
download_file(tkn, src_url, dest_file)
# Done! Check out your "C:/temp/"
# ------------------------------------------------------------------
You can apply loops to download multiple files.
Thank you!
Donn