问题
I am trying to create a Python script in NiFi that:
- Reads some attributes from an incoming flowfile
- Read the json content of the flowfile & extract specific fields
- Write attributes to outgoing flowfile
- Overwrite incoming flowfile with new content that is created in the script (e.g. API call that returns new json) and send it to SUCCESS relationship OR remove the old flowfile and create new with desired content
What i ve done so far:
import json
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback,InputStreamCallback, OutputStreamCallback
class OutputWrite(OutputStreamCallback, obj):
def __init__(self):
self.obj = obj
def process(self, outputStream):
outputStream.write(bytearray(json.dumps(self.obj).encode('utf')))
###end class###
flowfile = session.get()
if flowfile != None:
**#1) Get flowfile attributes**
headers = {
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'application/json, text/plain, */*',
'Cache-Control': 'no-cache',
'Ocp-Apim-Trace': 'true',
'Authorization': flowfile.getAttribute('Authorization')
}
collection = flowfile.getAttribute('collection')
dataset = flowfile.getAttribute('dataset')
**#2)Get flowfile content**
stream_content = session.read(flowfile)
text_content = IOUtils.toString(stream_content, StandardCharsets.UTF_8)
json_content = json.loads(text_content)
records = json_content['result']['count']
pages = records/10000
**#3) Write flowfile attributes**
flowfile = session.putAttribute(flowfile, 'collection', collection)
flowfile = session.putAttribute(flowfile, 'dataset', dataset)
**#API operations: output_json with desired data**
output_json = {some data}
**#4) Write final JSON data to output flowfile**
flowfile = session.write(flowfile, OutputWrite(output_json))
session.transfer(flowfile, REL_SUCCESS)
session.commit()
My problem is that i can't find a way to pass a reference to the desired output_json object as an argument in the OutputStreamCallback class. Any ideas on how to resolve this or maybe a better approach?
Is it maybe easier to perform all API operations in this case within the process function of the class, but then how do i get access to the incoming flowfile attributes within the process function (requires a session or a flowfile object) ?
Any help much appreciated!
回答1:
I've included example Python code below which allows for a custom PyStreamCallback
class which implements logic to transform JSON in the flowfile content from Matt Burgess' blog article on the topic, but I would encourage you to consider using native processors for UpdateAttribute
and EvaluateJSONPath
to perform the relevant activities and only use custom code where it is specifically needed to perform a task that NiFi doesn't handle out of the box.
import json
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class PyStreamCallback(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
obj = json.loads(text)
newObj = {
"Range": 5,
"Rating": obj['rating']['primary']['value'],
"SecondaryRatings": {}
}
for key, value in obj['rating'].iteritems():
if key != "primary":
newObj['SecondaryRatings'][key] = {"Id": key, "Range": 5, "Value": value['value']}
outputStream.write(bytearray(json.dumps(newObj, indent=4).encode('utf-8')))
flowFile = session.get()
if (flowFile != None):
flowFile = session.write(flowFile,PyStreamCallback())
flowFile = session.putAttribute(flowFile, "filename", flowFile.getAttribute('filename').split('.')[0]+'_translated.json')
session.transfer(flowFile, REL_SUCCESS)
Update:
To access the attributes of the flowfile within the callback, simply pass it as an argument to the constructor, store it as a field, and reference it within the process
method. Here is a very simple example that concatenates the value of attribute my_attr
to the incoming flowfile content and writes it back:
import json
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class PyStreamCallback(StreamCallback):
def __init__(self, flowfile):
self.ff = flowfile
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
text += self.ff.getAttribute('my_attr')
outputStream.write(bytearray(text.encode('utf-8')))
flowFile = session.get()
if (flowFile != None):
flowFile = session.write(flowFile,PyStreamCallback(flowFile))
session.transfer(flowFile, REL_SUCCESS)
Incoming flowfile:
--------------------------------------------------
Standard FlowFile Attributes
Key: 'entryDate'
Value: 'Tue Mar 13 13:10:48 PDT 2018'
Key: 'lineageStartDate'
Value: 'Tue Mar 13 13:10:48 PDT 2018'
Key: 'fileSize'
Value: '30'
FlowFile Attribute Map Content
Key: 'filename'
Value: '1690494181462176'
Key: 'my_attr'
Value: 'This is an attribute value.'
Key: 'path'
Value: './'
Key: 'uuid'
Value: 'dc93b715-50a0-43ce-a4db-716bd9ec3205'
--------------------------------------------------
This is some flowfile content.
Outgoing flowfile:
--------------------------------------------------
Standard FlowFile Attributes
Key: 'entryDate'
Value: 'Tue Mar 13 13:10:48 PDT 2018'
Key: 'lineageStartDate'
Value: 'Tue Mar 13 13:10:48 PDT 2018'
Key: 'fileSize'
Value: '57'
FlowFile Attribute Map Content
Key: 'filename'
Value: '1690494181462176'
Key: 'my_attr'
Value: 'This is an attribute value.'
Key: 'path'
Value: './'
Key: 'uuid'
Value: 'dc93b715-50a0-43ce-a4db-716bd9ec3205'
--------------------------------------------------
This is some flowfile content.This is an attribute value.
回答2:
You can try something like this-
import json
import sys
import traceback
from java.nio.charset import StandardCharsets
from org.apache.commons.io import IOUtils
from org.apache.nifi.processor.io import StreamCallback
from org.python.core.util import StringUtil
class TransformCallback(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
try:
# Read input FlowFile content
input_text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
input_obj = json.loads(input_text)
# Transform content
output_obj = input_obj #your input content
#perform Data tranformation on output_obj
# Write output content
output_text = json.dumps(outputJson)
outputStream.write(StringUtil.toBytes(output_text))
except:
traceback.print_exc(file=sys.stdout)
raise
flowFile = session.get()
if flowFile != None:
flowFile = session.write(flowFile, TransformCallback())
# Finish by transferring the FlowFile to an output relationship
session.transfer(flowFile, REL_SUCCESS)
来源:https://stackoverflow.com/questions/49235028/python-executescript-in-nifi-transform-flowfile-attributes-content