Download a csv file from gmail using python

后端 未结 4 1181
無奈伤痛
無奈伤痛 2020-12-17 06:40

I tried different python scripts for download a CSV attachment from Gmail. But I could not able to get it.Is this possible. If it is possible which python script should I us

相关标签:
4条回答
  • 2020-12-17 07:14

    TL;DR

    • I've put together a Github repo that makes getting CSV data from Gmail as simple as:

      from gmail import *
      service = get_gmail_service()
      
      # get all attachments from e-mails containing 'test'
      search_query = "test"
      service = get_gmail_service()
      csv_dfs = query_for_csv_attachments(service, search_query)
      print(csv_dfs)
      
    • Follow the instructions in the README and feel free to contribute!

    THE LONG ANSWER (directly using google-api-python-client and oauth2client)

    • Follow this link and click the button: "ENABLE THE GMAIL API". After the setup you will download a file called credentials.json.
    • Install the needed Python packages:

      pip install --upgrade google-api-python-client oauth2client
      
    • The following code will allow you to connect to your Gmail account via Python:

      from googleapiclient.discovery import build
      from httplib2 import Http
      from oauth2client import file, client, tools
      
      GMAIL_CREDENTIALS_PATH = 'credentials.json' # downloaded
      GMAIL_TOKEN_PATH = 'token.json' # this will be created
      
      store = file.Storage(GMAIL_TOKEN_PATH)
      creds = store.get()
      if not creds or creds.invalid:
          flow = client.flow_from_clientsecrets(GMAIL_CREDENTIALS_PATH, SCOPES)
          creds = tools.run_flow(flow, store)
      service = build('gmail', 'v1', http=creds.authorize(Http()))
      
    • With this service you can read your emails and any attachments.

    • First you can query your e-mails with a search string to find the e-mail id's that have the attachments:

      search_query = "ABCD"
      result = service.users().messages().list(userId='me', q=search_query).execute()
      msgs = results['messages']
      msg_ids = [msg['id'] for msg in msgs]
      
    • For each messageId you can find the associated attachments in the email.

    • This part is a little messy so bear with me. First we obtain a list of "attachment parts" (and attachment filenames). These are components of the email that contain attachments:

      messageId = 'XYZ'
      msg = service.users().messages().get(userId='me', id=messageId).execute()
      parts = msg.get('payload').get('parts')
      all_parts = []
      for p in parts:
          if p.get('parts'):
              all_parts.extend(p.get('parts'))
          else:
              all_parts.append(p)
      
      att_parts = [p for p in all_parts if p['mimeType']=='text/csv']
      filenames = [p['filename'] for p in att_parts]
      
    • Now we can obtain the attached CSV from each part:

      messageId = 'XYZ'
      data = part['body'].get('data')
      attachmentId = part['body'].get('attachmentId')
      if not data:
          att = service.users().messages().attachments().get(
                  userId='me', id=attachmentId, messageId=messageId).execute()
          data = att['data']
      
    • Now you have the CSV data but it's in an encoded format, so we change the encoding and convert the result into a Pandas dataframe:

      import base64
      import pandas as pd
      from StringIO import StringIO
      str_csv  = base64.urlsafe_b64decode(data.encode('UTF-8'))
      df = pd.read_csv(StringIO(str_csv))
      
    • That's it! you have a Pandas dataframe with the contents of the CSV attachment. You can work with this dataframe or write it to disk with pd.DataFrame.to_csv if you simply want to download it. You can use the list of filenames obtained earlier if you want to preserve the filename.

    0 讨论(0)
  • 2020-12-17 07:18

    I got it. This is not my own work. I got some codes, combined them and modified to this code. However, finally, it worked.

    print 'Proceeding'
    
    import email
    import getpass
    import imaplib
    import os
    import sys
    
    userName = 'yourgmail@gmail.com'
    passwd = 'yourpassword'
    directory = '/full/path/to/the/directory'
    
    
    detach_dir = '.'
    if 'DataFiles' not in os.listdir(detach_dir):
        os.mkdir('DataFiles')
    
    
    
    try:
        imapSession = imaplib.IMAP4_SSL('imap.gmail.com')
        typ, accountDetails = imapSession.login(userName, passwd)
        if typ != 'OK':
            print 'Not able to sign in!'
            raise
    
        imapSession.select('[Gmail]/All Mail')
        typ, data = imapSession.search(None, 'ALL')
        if typ != 'OK':
            print 'Error searching Inbox.'
            raise
    
    
        for msgId in data[0].split():
            typ, messageParts = imapSession.fetch(msgId, '(RFC822)')
            if typ != 'OK':
                print 'Error fetching mail.'
                raise
    
            emailBody = messageParts[0][1]
            mail = email.message_from_string(emailBody)
            for part in mail.walk():
                if part.get_content_maintype() == 'multipart':
                    continue
                if part.get('Content-Disposition') is None:
                    continue
                fileName = part.get_filename()
    
                if bool(fileName):
                    filePath = os.path.join(detach_dir, 'DataFiles', fileName)
                    if not os.path.isfile(filePath) :
                        print fileName
                        fp = open(filePath, 'wb')
                        fp.write(part.get_payload(decode=True))
                        fp.close()
        imapSession.close()
        imapSession.logout()
    
        print 'Done'
    
    
    except :
        print 'Not able to download all attachments.'
    
    0 讨论(0)
  • 2020-12-17 07:24
    from imap_tools import MailBox
    
    # get all .csv attachments from INBOX and save them to files
    with MailBox('imap.my.ru').login('acc', 'pwd', 'INBOX') as mailbox:
        for msg in mailbox.fetch():
            for att in msg.attachments:
                if att.filename.lower().endswith('.csv'):
                    with open('C:/1/{}'.format(att.filename), 'wb') as f:
                        f.write(att.payload)
    

    https://github.com/ikvk/imap_tools

    0 讨论(0)
  • 2020-12-17 07:27

    An up to date answer has been provided at Download attachment from mail using Python

    import os
    from imbox import Imbox # pip install imbox
    import traceback
    
    # enable less secure apps on your google account
    # https://myaccount.google.com/lesssecureapps
    
    host = "imap.gmail.com"
    username = "username"
    password = 'password'
    download_folder = "/path/to/download/folder"
    
    if not os.path.isdir(download_folder):
        os.makedirs(download_folder, exist_ok=True)
    
    mail = Imbox(host, username=username, password=password, ssl=True, ssl_context=None, starttls=False)
    messages = mail.messages() # defaults to inbox
    
    for (uid, message) in messages:
        mail.mark_seen(uid) # optional, mark message as read
    
        for idx, attachment in enumerate(message.attachments):
            try:
                att_fn = attachment.get('filename')
                download_path = f"{download_folder}/{att_fn}"
                print(download_path)
                with open(download_path, "wb") as fp:
                    fp.write(attachment.get('content').read())
            except:
                pass
                print(traceback.print_exc())
    
    mail.logout()
    
    
    """
    Available Message filters: 
    
    # Gets all messages from the inbox
    messages = mail.messages()
    
    # Unread messages
    messages = mail.messages(unread=True)
    
    # Flagged messages
    messages = mail.messages(flagged=True)
    
    # Un-flagged messages
    messages = mail.messages(unflagged=True)
    
    # Flagged messages
    messages = mail.messages(flagged=True)
    
    # Un-flagged messages
    messages = mail.messages(unflagged=True)
    
    # Messages sent FROM
    messages = mail.messages(sent_from='sender@example.org')
    
    # Messages sent TO
    messages = mail.messages(sent_to='receiver@example.org')
    
    # Messages received before specific date
    messages = mail.messages(date__lt=datetime.date(2018, 7, 31))
    
    # Messages received after specific date
    messages = mail.messages(date__gt=datetime.date(2018, 7, 30))
    
    # Messages received on a specific date
    messages = mail.messages(date__on=datetime.date(2018, 7, 30))
    
    # Messages whose subjects contain a string
    messages = mail.messages(subject='Christmas')
    
    # Messages from a specific folder
    messages = mail.messages(folder='Social')
    """
    
    0 讨论(0)
提交回复
热议问题