Parse multipart request string in Python

前端未结

关注

 5  1620

I have a string like this

\"--5b34210d81fb44c5a0fdc1a1e5ce42c3\\r\\nContent-Disposition: form-data; name=\\\"author\\\"\\r\\n\\r\\nJohn Smith\\r\\n--5b34210d


                      
              相关标签:


      
      
        
          5条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  忘掉有多难        
                
              
                            
                2020-12-10 10:04
              
            
            
                                                                       
If you want to use Python's CGI,
from cgi import parse_multipart, parse_header
from io import BytesIO

c_type, c_data = parse_header(event['headers']['Content-Type'])
assert c_type == 'multipart/form-data'
decoded_string = base64.b64decode(event['body'])
#For Python 3: these two lines of bugfixing are mandatory
#see also: https://stackoverflow.com/questions/31486618/cgi-parse-multipart-function-throws-typeerror-in-python-3
c_data['boundary'] = bytes(c_data['boundary'], "utf-8")
c_data['CONTENT-LENGTH'] = event['headers']['Content-length']
form_data = parse_multipart(BytesIO(decoded_string), c_data)

for image_str in form_data['file']:
    ...

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  醉话见心        
                
              
                            
                2020-12-10 10:06
              
            
            
                                                                       
Expanding on sam-anthony' answer (I had to make some fixes for it to work on python 3.6.8):

from requests_toolbelt.multipart import decoder

multipart_string = b"--ce560532019a77d83195f9e9873e16a1\r\nContent-Disposition: form-data; name=\"author\"\r\n\r\nJohn Smith\r\n--ce560532019a77d83195f9e9873e16a1\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example2.txt\"\r\nContent-Type: text/plain\r\nExpires: 0\r\n\r\nHello World\r\n--ce560532019a77d83195f9e9873e16a1--\r\n"
content_type = "multipart/form-data; boundary=ce560532019a77d83195f9e9873e16a1"

for part in decoder.MultipartDecoder(multipart_string, content_type).parts:
  print(part.text)

John Smith
Hello World


What you'd have to do is install this library through pip install requests-toolbelt --target=. and then upload it along with your lambda script

Here's a working example:

from requests_toolbelt.multipart import decoder

def lambda_handler(event, context):

    content_type_header = event['headers']['Content-Type']

    body = event["body"].encode()

    response = ''
    for part in decoder.MultipartDecoder(body, content_type_header).parts:
      response += part.text + "\n"

    return {
        'statusCode': 200,
        'body': response
    }


This should be enough for your dependencies to be recognized. If they aren't, try using the "/python/lib/python3.6/site-packages" file structure inside the zip with your python script at root"
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  南旧        
                
              
                            
                2020-12-10 10:08
              
            
            
                                                                       
It can be parsed by using something like

from requests_toolbelt.multipart import decoder
multipart_string = "--ce560532019a77d83195f9e9873e16a1\r\nContent-Disposition: form-data; name=\"author\"\r\n\r\nJohn Smith\r\n--ce560532019a77d83195f9e9873e16a1\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example2.txt\"\r\nContent-Type: text/plain\r\nExpires: 0\r\n\r\nHello World\r\n--ce560532019a77d83195f9e9873e16a1--\r\n"
content_type = "multipart/form-data; boundary=ce560532019a77d83195f9e9873e16a1"
decoder.MultipartDecoder(multipart_string, content_type)

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  孤街浪徒        
                
              
                            
                2020-12-10 10:10
              
            
            
                                                                       
If using CGI, I recommend using FieldStorage:
from cgi import FieldStorage

fs = FieldStorage(fp=event['body'], headers=event['headers'], environ={'REQUEST_METHOD':'POST', 'CONTENT_TYPE':event['headers']['Content-Type'], })['file']
originalFileName = fs.filename
binaryFileData = fs.file.read()

see also:
https://stackoverflow.com/a/38718958/10913265
If the event body contains multiple files:
fs = FieldStorage(fp=event['body'], headers=event['headers'], environ={'REQUEST_METHOD':'POST', 'CONTENT_TYPE':event['headers']['Content-Type'], })['file']

delivers a list of FieldStorage objects. So you can do:
for f in fs:
    originalFileName = f.filename
    binaryFileData = f.file.read()

Altogether my solution for dealing with a single file as well as multiple files as well as a body containing no file and assuring that it was mutlipart/form-data:
from cgi import parse_header, FieldStorage

#see also: https://stackoverflow.com/a/56405982/10913265
c_type, c_data = parse_header(event['headers']['Content-Type'])
assert c_type == 'multipart/form-data'

#see also: https://stackoverflow.com/a/38718958/10913265
fs = FieldStorage(fp=event['body'], headers=event['headers'], environ={'REQUEST_METHOD':'POST', 'CONTENT_TYPE':event['headers']['Content-Type'], })['file']

#If fs contains a single file or no file: making FieldStorage object to a list, so it gets iterable
if not(type(fs) == list):
    fs = [fs]

for f in fs:
    originalFileName = f.filename
    #no file: 
    if originalFileName == '':
        continue
    binaryFileData = f.file.read()
    #Do something with the data 

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  無奈伤痛        
                
              
                            
                2020-12-10 10:18
              
            
            
                                                                       
Had a bunch of weird encoding issues and also odd behavior with api gateway, originally received the body of the request at bytes and then after redeploying started to receive them as base64.  Anyway this is the code that ended up working for me.

import json
import base64
import boto3
from requests_toolbelt.multipart import decoder

s3client = boto3.client("s3")
def lambda_handler(event, context):
    content_type_header = event['headers']['content-type']
    postdata = base64.b64decode(event['body']).decode('iso-8859-1')
    imgInput = ''
    lst = []
    for part in decoder.MultipartDecoder(postdata.encode('utf-8'), content_type_header).parts:
        lst.append(part.text)
    response = s3client.put_object(  Body=lst[0].encode('iso-8859-1'),  Bucket='test',    Key='mypicturefinal.jpg')
    return {'statusCode': '200','body': 'Success', 'headers': { 'Content-Type': 'text/html' }}

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复