Java name parse library?

前端 未结 5 784
梦如初夏
梦如初夏 2021-01-07 20:58

I\'m searching for a library similar in functionality to the Perl Lingua::EN::NameParse module. Essentially, I\'d like to parse strings like \'Mr. Bob R. Smith\' into prefix

5条回答
  •  萌比男神i
    2021-01-07 21:33

    I just can't believe someone hasn't shared a library for this - well I looked in github and there's a javascript name parser that could be easily translated to java: https://github.com/joshfraser/JavaScript-Name-Parser

    I also modified the code in one of the answers to work a little better and have included a test case:

    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.commons.lang.StringUtils;
    
    public class NameParser {
        private String firstName = "";
        private String lastName = "";
        private String middleName = "";
        private List middleNames = new ArrayList();
        private List titlesBefore = new ArrayList();
        private List titlesAfter = new ArrayList();
        private String[] prefixes = { "dr", "mr", "ms", "atty", "prof", "miss", "mrs" };
        private String[] suffixes = { "jr", "sr", "ii", "iii", "iv", "v", "vi", "esq", "2nd", "3rd", "jd", "phd",
                "md", "cpa" };
    
        public NameParser() {
        }
    
        public NameParser(String name) {
            parse(name);
        }
    
        private void reset() {
            firstName = lastName = middleName = "";
            middleNames = new ArrayList();
            titlesBefore = new ArrayList();
            titlesAfter = new ArrayList();
        }
    
        private boolean isOneOf(String checkStr, String[] titles) {
            for (String title : titles) {
                if (checkStr.toLowerCase().startsWith(title))
                    return true;
            }
            return false;
        }
    
        public void parse(String name) {
            if (StringUtils.isBlank(name))
                return;
            this.reset();
            String[] words = name.split(" ");
            boolean isFirstName = false;
    
            for (String word : words) {
                if (StringUtils.isBlank(word))
                    continue;
                if (word.charAt(word.length() - 1) == '.') {
                    if (!isFirstName && !this.isOneOf(word, prefixes)) {
                        firstName = word;
                        isFirstName = true;
                    } else if (isFirstName) {
                        middleNames.add(word);
                    } else {
                        titlesBefore.add(word);
                    }
                } else {
                    if (word.endsWith(","))
                        word = StringUtils.chop(word);
                    if (isFirstName == false) {
                        firstName = word;
                        isFirstName = true;
                    } else {
                        middleNames.add(word);
                    }
                }
            }
            if (middleNames.size() > 0) {
                boolean stop = false;
                List toRemove = new ArrayList();
                for (int i = middleNames.size() - 1; i >= 0 && !stop; i--) {
                    String str = middleNames.get(i);
                    if (this.isOneOf(str, suffixes)) {
                        titlesAfter.add(str);
                    } else {
                        lastName = str;
                        stop = true;
                    }
                    toRemove.add(str);
                }
                if (StringUtils.isBlank(lastName) && titlesAfter.size() > 0) {
                    lastName = titlesAfter.get(titlesAfter.size() - 1);
                    titlesAfter.remove(titlesAfter.size() - 1);
                }
                for (String s : toRemove) {
                    middleNames.remove(s);
                }
            }
        }
    
        public String getFirstName() {
            return firstName;
        }
    
        public String getLastName() {
            return lastName;
        }
    
        public String getMiddleName() {
            if (StringUtils.isBlank(this.middleName)) {
                for (String name : middleNames) {
                    middleName += (name + " ");
                }
                middleName = StringUtils.chop(middleName);
            }
            return middleName;
        }
    
        public List getTitlesBefore() {
            return titlesBefore;
        }
    
        public List getTitlesAfter() {
            return titlesAfter;
        }
    
    }
    

    Test case:

    import junit.framework.Assert;
    
    import org.junit.Test;
    
    public class NameParserTest {
    
        private class TestData {
            String name;
    
            String firstName;
            String lastName;
            String middleName;
    
            public TestData(String name, String firstName, String middleName, String lastName) {
                super();
                this.name = name;
                this.firstName = firstName;
                this.lastName = lastName;
                this.middleName = middleName;
            }
    
        }
    
        @Test
        public void test() {
    
            TestData td[] = { new TestData("Henry \"Hank\" J. Fasthoff IV", "Henry", "\"Hank\" J.", "Fasthoff"),
                    new TestData("April A. (Caminez) Bentley", "April", "A. (Caminez)", "Bentley"),
                    new TestData("fff lll", "fff", "", "lll"),
                    new TestData("fff mmmmm lll", "fff", "mmmmm", "lll"),
                    new TestData("fff mmm1      mm2 lll", "fff", "mmm1 mm2", "lll"),
                    new TestData("Mr. Dr. Tom Jones", "Tom", "", "Jones"),
                    new TestData("Robert P. Bethea Jr.", "Robert", "P.", "Bethea"),
                    new TestData("Charles P. Adams, Jr.", "Charles", "P.", "Adams"),
                    new TestData("B. Herbert Boatner, Jr.", "B.", "Herbert", "Boatner"),
                    new TestData("Bernard H. Booth IV", "Bernard", "H.", "Booth"),
                    new TestData("F. Laurens \"Larry\" Brock", "F.", "Laurens \"Larry\"", "Brock"),
                    new TestData("Chris A. D'Amour", "Chris", "A.", "D'Amour") };
    
            NameParser bp = new NameParser();
            for (int i = 0; i < td.length; i++) {
                bp.parse(td[i].name);
                Assert.assertEquals(td[i].firstName, bp.getFirstName());
                Assert.assertEquals(td[i].lastName, bp.getLastName());
                Assert.assertEquals(td[i].middleName, bp.getMiddleName());
            }
        }
    
    }
    

提交回复
热议问题