💨

GoogleDriveのxlsxファイルをローカルサーバから取ってくる(python)

2022/02/11に公開

googleapiを使って、スプレッドシートファイルのデータを取ってくるのとxlsxファイルのデータを取ってくるのは話が大きく変わってきます。意外とどこにも載ってないのでここでまとめています。
2022/02/11時点 役立ったらいいね下さい

from __future__ import print_function

import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# If modifying these scopes, delete the file token.json.
SCOPES = ['https://www.googleapis.com/auth/drive']

def main():
    """Shows basic usage of the Sheets API.
    Prints values from a sample spreadsheet.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                './credentials_desktop.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
if __name__ == '__main__':
    main()
from googleapiclient.http import MediaIoBaseDownload
import io
import pandas as pd

creds = Credentials.from_authorized_user_file('token.json', SCOPES)
service = build('drive', 'v3', credentials=creds)
file_id = '####'

request = service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print ("Download %d%%." % int(status.progress() * 100))
data = pd.read_excel(fh, usecols=None, sheet_name=None)

取れました

{'シート1':    1.0  2.0  2.0.1
 0  2.0   aa    3.0
 1  3.0  4.0    4.0
 2  4.0  5.0    5.0
 3  4.0  5.0    5.0}

Discussion