Spring Batch : Parsing a CSV file with quoteCharacter

问题

I'm new in Spring Batch, we know that CSV files come in all form and shapes… and some of them are syntactically incorrect. I'm tring to parse a CSV file, that line start with '"' and end with '"'this is my CSV :

"1;Paris;13/4/1992;16/7/2006"
"2;Lyon;31/5/1993;1/8/2009"
"3;Metz;21/4/1990;27/4/2010"

I tried this :

  <bean id="itemReader" class="org.springframework.batch.item.file.FlatFileItemReader">
    <property name="resource" value="data-1.txt" />
    <property name="lineMapper">
      <bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
        <property name="fieldSetMapper">
          <!-- Mapper which maps each individual items in a record to properties in POJO -->
          <bean class="com.sam.fourthTp.MyFieldSetMapper" />
        </property>
        <property name="lineTokenizer">
          <!-- A tokenizer class to be used when items in input record are separated by specific characters -->
          <bean class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
            <property name="quoteCharacter" value="&quot;" />
            <property name="delimiter" value=";" />
          </bean>
        </property>
      </bean>
    </property>
  </bean>

But this work when a CSV file be like this :

"1";"Paris";"13/4/1992";"16/7/2006"
"2;"Lyon";"31/5/1993";"1/8/2009"
"3";"Metz";"21/4/1990";"27/4/2010"

My question is how I can parse my CSV when a line start with '"' and end with '"' ??!

回答1:

The quoteCharacter is as you mentioned applicable to fields, not records.

My question is how I can parse my CSV when a line start with '"' and end with '"' ??!

What you can do is:

Read lines as raw Strings
Use a composite item processor with two delegates: One that trims the " from the start/end of each record, and another one that parses the line and map it to your domain object

Here is a quick example:

import java.util.Arrays;

import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.item.file.mapping.BeanWrapperFieldSetMapper;
import org.springframework.batch.item.file.transform.DelimitedLineTokenizer;
import org.springframework.batch.item.file.transform.FieldSet;
import org.springframework.batch.item.support.CompositeItemProcessor;
import org.springframework.batch.item.support.ListItemReader;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.AnnotationConfigApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
@EnableBatchProcessing
public class MyJob {

    @Autowired
    private JobBuilderFactory jobs;

    @Autowired
    private StepBuilderFactory steps;

    @Bean
    public ItemReader<String> itemReader() {
        return new ListItemReader<>(Arrays.asList(
                "\"1;Paris;13/4/1992;16/7/2006\"",
                "\"2;Lyon;31/5/1993;1/8/2009\"",
                "\"3;Metz;21/4/1990;27/4/2010\"",
                "\"4;Lille;21/4/1980;27/4/2011\""
                ));
    }

    @Bean
    public ItemProcessor<String, String> itemProcessor1() {
        return item -> item.substring(1, item.length() - 1);
    }

    @Bean
    public ItemProcessor<String, Record> itemProcessor2() {
        DelimitedLineTokenizer lineTokenizer = new DelimitedLineTokenizer();
        lineTokenizer.setNames("id", "ville");
        lineTokenizer.setDelimiter(";");
        lineTokenizer.setStrict(false);
        BeanWrapperFieldSetMapper<Record> fieldSetMapper = new BeanWrapperFieldSetMapper<>();
        fieldSetMapper.setTargetType(Record.class);
        return item -> {
            FieldSet tokens = lineTokenizer.tokenize(item);
            return fieldSetMapper.mapFieldSet(tokens);
        };
    }

    @Bean
    public ItemWriter<Record> itemWriter() {
        return items -> {
            for (Record item : items) {
                System.out.println(item);
            }
        };
    }

    @Bean
    public CompositeItemProcessor<String, Record> compositeItemProcessor() {
        CompositeItemProcessor<String, Record> compositeItemProcessor = new CompositeItemProcessor<>();
        compositeItemProcessor.setDelegates(Arrays.asList(itemProcessor1(), itemProcessor2()));
        return compositeItemProcessor;
    }

    @Bean
    public Step step() {
        return steps.get("step")
                .<String, Record>chunk(2)
                .reader(itemReader())
                .processor(compositeItemProcessor())
                .writer(itemWriter())
                .build();
    }

    @Bean
    public Job job() {
        return jobs.get("job")
                .start(step())
                .build();
    }

    public static class Record {

        private int id;
        private String ville;

        public Record() {
        }

        public int getId() {
            return id;
        }

        public void setId(int id) {
            this.id = id;
        }

        public String getVille() {
            return ville;
        }

        public void setVille(String ville) {
            this.ville = ville;
        }

        @Override
        public String toString() {
            return "Record{" +
                    "id=" + id +
                    ", ville='" + ville + '\'' +
                    '}';
        }
    }

    public static void main(String[] args) throws Exception {
        ApplicationContext context = new AnnotationConfigApplicationContext(MyJob.class);
        JobLauncher jobLauncher = context.getBean(JobLauncher.class);
        Job job = context.getBean(Job.class);
        jobLauncher.run(job, new JobParameters());
    }

}

I used a simple POJO called Record and mapped only two fields. This sample prints:

Record{id=1, ville='Paris'}
Record{id=2, ville='Lyon'}
Record{id=3, ville='Metz'}
Record{id=4, ville='Lille'}

Hope this helps.

来源：https://stackoverflow.com/questions/55730974/spring-batch-parsing-a-csv-file-with-quotecharacter

标签

java

Spring

spring-batch