问题
I'm taking data from spout.Each bolt will insert mapped fields into different tables in my database.But my database tables have constraints.in my test tables I have two tables named user-details and my-details for which constraints allows users-table to fill first(first on should be inserted) after that only my-details table will be inserted.when I run the topology only users-table is getting inserted because when bolts perform the insert query to the database it is allowing only psqlbolt to insert first (because of constraints) and psqlbolt1 throwing the exception saying users id is not found.So I kept (1000)sleep in the psqlbolt1 when I do that two bolts are working.But when I apply same for many bolts(12) the waiting timing is increasing and bolt execution is failing saying bolt wait time is exceeded.How can I execute user fields first after that only psql1 should start inserting.
My topology class
public class Topology {
ConnectionProvider cp;
protected static final String JDBC_CONF = "jdbc.conf";
protected static final String TABLE_NAME = "users";
protected static final String SELECT_QUERY = "select dept_name from department, user_department where department.dept_id = user_department.dept_id" +
" and user_department.user_id = ?";
public static void main(String[] args) throws Exception{
String argument = args[0];
JdbcMapper jdbcMapper;
TopologyBuilder builder = new TopologyBuilder();
Map map = Maps.newHashMap();
map.put("dataSourceClassName", "org.postgresql.ds.PGSimpleDataSource");
map.put("dataSource.url","jdbc:postgresql://localhost:5432/twitter_analysis?user=postgres");
ConnectionProvider cp = new MyConnectionProvider(map);
jdbcMapper = new SimpleJdbcMapper(TABLE_NAME, cp);
List<Column> schemaColumns = Lists.newArrayList(new Column("user_id", Types.INTEGER), new Column ("user_name",Types.VARCHAR),new Column("create_date", Types.TIMESTAMP));
JdbcMapper mapper = new SimpleJdbcMapper(schemaColumns);
PsqlBolt userPersistanceBolt = new PsqlBolt(cp, mapper)
.withInsertQuery("insert into user_details (id, user_name, created_timestamp) values (?,?,?)");
builder.setSpout("myspout", new UserSpout(), 1);
builder.setBolt("Psql_Bolt", userPersistanceBolt,1).shuffleGrouping("myspout");
jdbcMapper = new SimpleJdbcMapper("My_details", cp);
List<Column> schemaColumns1 = Lists.newArrayList(new Column("my_id", Types.INTEGER), new Column ("my_name",Types.VARCHAR));
JdbcMapper mapper1 = new SimpleJdbcMapper(schemaColumns1);
PsqlBolt1 userPersistanceBolt1 = new PsqlBolt1(cp, mapper1)
.withInsertQuery("insert into My_details (my_id, my_name) values (?,?)");
//builder.setSpout("myspout", new UserSpout(), 1);
builder.setBolt("Psql_Bolt1", userPersistanceBolt1,1).shuffleGrouping("myspout");
Config conf = new Config();
conf.put(JDBC_CONF, map);
conf.setDebug(true);
conf.setNumWorkers(3);
if (argument.equalsIgnoreCase("runLocally")){
System.out.println("Running topology locally...");
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("Twitter Test Storm-postgresql", conf, builder.createTopology());
}
else {
System.out.println("Running topology on cluster...");
StormSubmitter.submitTopology("Topology_psql", conf, builder.createTopology());
}
}}
My bolts:psql1
public class PsqlBolt1 extends AbstractJdbcBolt {
private static final Logger LOG = Logger.getLogger(PsqlBolt1.class);
private String tableName;
private String insertQuery;
private JdbcMapper jdbcMapper;
public PsqlBolt1(ConnectionProvider connectionProvider, JdbcMapper jdbcMapper) {
super(connectionProvider);
this.jdbcMapper = jdbcMapper;
}
public PsqlBolt1 withInsertQuery(String insertQuery) {
this.insertQuery = insertQuery;
System.out.println("query passsed.....");
return this;
}
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector collector) {
super.prepare(map, topologyContext, collector);
if(StringUtils.isBlank(tableName) && StringUtils.isBlank(insertQuery)) {
throw new IllegalArgumentException("You must supply either a tableName or an insert Query.");
}
}
@Override
public void execute(Tuple tuple) {
try {
Thread.sleep(1000);
List<Column> columns = jdbcMapper.getColumns(tuple);
List<List<Column>> columnLists = new ArrayList<List<Column>>();
columnLists.add(columns);
if(!StringUtils.isBlank(tableName)) {
this.jdbcClient.insert(this.tableName, columnLists);
} else {
this.jdbcClient.executeInsertQuery(this.insertQuery, columnLists);
}
this.collector.ack(tuple);
} catch (Exception e) {
this.collector.reportError(e);
this.collector.fail(tuple);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}}
psqlbolt:
public class PsqlBolt extends AbstractJdbcBolt {
private static final Logger LOG = Logger.getLogger(PsqlBolt.class);
private String tableName;
private String insertQuery;
private JdbcMapper jdbcMapper;
public PsqlBolt(ConnectionProvider connectionProvider, JdbcMapper jdbcMapper) {
super(connectionProvider);
this.jdbcMapper = jdbcMapper;
}
public PsqlBolt withTableName(String tableName) {
this.tableName = tableName;
return this;
}
public PsqlBolt withInsertQuery(String insertQuery) {
this.insertQuery = insertQuery;
System.out.println("query passsed.....");
return this;
}
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector collector) {
super.prepare(map, topologyContext, collector);
if(StringUtils.isBlank(tableName) && StringUtils.isBlank(insertQuery)) {
throw new IllegalArgumentException("You must supply either a tableName or an insert Query.");
}
}
@Override
public void execute(Tuple tuple) {
try {
List<Column> columns = jdbcMapper.getColumns(tuple);
List<List<Column>> columnLists = new ArrayList<List<Column>>();
columnLists.add(columns);
if(!StringUtils.isBlank(tableName)) {
this.jdbcClient.insert(this.tableName, columnLists);
} else {
this.jdbcClient.executeInsertQuery(this.insertQuery, columnLists);
}
this.collector.ack(tuple);
} catch (Exception e) {
this.collector.reportError(e);
this.collector.fail(tuple);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}}
when I applied same for many bolts my topology colour is changing into red(wait state).
Here is bolts wait time.first bolt doesn't have any sleep.I kept 1 sec sleep in the second bolt and rest all the bolts are having 2 secs sleep.
How to replace that sleep to perform my work or if I Increase the number of supervisors does the problem will be solved?
回答1:
You can restructure your topology such that spout sends a message M to the bolt 1. Bolt 1 can take some action on this message and forwards the same message to bolt 2 only if the action is successful. This way, there is a strict ordering between the actions.
回答2:
I missed a point that a bolt is written to do some different function on tuples.I'm trying to write different inserts queries from different bolts which perform same function of insert of tuples coming from spout. I realized that I'm Not making any difference in the bolts.so I implemented all the insert queries with in one blot with out using multiple insert query bolts,after mapping all the fields I just written a sequence of insert queries as I wanted(one by one).
kafka_spout and bolt
来源:https://stackoverflow.com/questions/35079414/how-to-excute-one-bolt-after-the-other-when-each-bolt-takes-data-from-same-spout