问题
I have big XML file and I am parsing as below:
public class Solution {
private static final String ROOM_ID = "RoomID";
private static final String CONTENT = "Content";
private static final String LOGIN_NAME = "LoginName";
private static final String CONVERSATION_ID = "ConversationID";
private static final String FILE_DUMP = "FileDump";
private static final String MESSAGE = "Message";
private static final String CONVERSATION = "Conversation";
private static final String START_TIME = "StartTime";
static class ConversationInfo {
private String startTimeStr;
private String conversationId;
private String fileName;
private int orderInFile;
private final Set<String> users = new HashSet<>();
private final List<Message> messages = new ArrayList<>();
public HashMap<String, String> map = new HashMap<>();
@Override
public String toString() {
return String.format("%s %d %s %s %s %d %s", fileName, orderInFile, conversationId,
startTimeStr, StringUtils.join(users, "***"), users.size(),
StringUtils.join(messages, "&&&"));
}
}
static class Message {
public final String userName;
public final String content;
public Message(String name, String content) {
this.userName = name;
this.content = content;
}
@Override
public String toString() {
return userName + " " + content;
}
}
public static void main(String[] args)
throws XMLStreamException, IOException {
File folder = new File("/xml/");
List<ConversationInfo> m = new ArrayList<>();
File[] listOfFiles = folder.listFiles();
for (int i = 0; i < listOfFiles.length; i++) {
File file = listOfFiles[i];
String fileName = file.getAbsolutePath();
System.out.println("File" + file);
if (file.isFile() && file.getName().endsWith(".xml")) {
XMLInputFactory xf = XMLInputFactory.newFactory();
try (FileInputStream fin = new FileInputStream(file)) {
XMLStreamReader xr = xf.createXMLStreamReader(fin);
LOOP:
while (xr.hasNext()) {
int event = xr.next();
switch (event) {
case XMLStreamConstants.START_ELEMENT: {
String elName = xr.getLocalName();
if (CONVERSATION.equals(elName)) {
ConversationInfo convInfo = parseConversation(xr, file.getName());
if (convInfo != null) {
m.add(convInfo);
}
}
break;
}
case XMLStreamConstants.END_ELEMENT: {
String elName = xr.getLocalName();
if (FILE_DUMP.equals(elName)) {
break LOOP;
}
break;
}
case XMLStreamConstants.END_DOCUMENT:
throw new IllegalStateException("xml not well-formed: <"
+ FILE_DUMP + "> tag not closed");
}
}
}
}
}
// ConversationInfo c = new ConversationInfo();
try (FileWriter w = new FileWriter("output.txt")) {
int i = 1;
for (ConversationInfo convInfo : m) {
convInfo.orderInFile = i;
w.write(String.format("%d %s\n", i++, convInfo));
}
}
}
private static ConversationInfo parseConversation(XMLStreamReader xr, String fileName)
throws XMLStreamException {
ConversationInfo convInfo = new ConversationInfo();
convInfo.fileName = fileName;
while (xr.hasNext()) {
int event = xr.next();
switch (event) {
case XMLStreamConstants.START_ELEMENT: {
String elName = xr.getLocalName();
if (MESSAGE.equals(elName)) {
Message message = parseMessage(xr);
if (message != null) {
convInfo.messages.add(message);
convInfo.users.add(message.userName);
convInfo.map.put(message.userName, message.content);
}
} else if (START_TIME.equals(elName)) {
convInfo.startTimeStr = xr.getElementText();
} else if (ROOM_ID.equals(elName)) {
convInfo.conversationId = xr.getElementText();
}
break;
}
case XMLStreamConstants.END_ELEMENT: {
String elName = xr.getLocalName();
if (CONVERSATION.equals(elName)) {
return convInfo;
}
break;
}
case XMLStreamConstants.END_DOCUMENT:
throw new XMLStreamException("xml not well-formed: <"
+ CONVERSATION + "> tag not closed");
}
}
throw new XMLStreamException(
"unexpected end of xml file while parsing a conversation");
}
private static Message parseMessage(XMLStreamReader xr)
throws XMLStreamException {
String userName = null;
String content = null;
while (xr.hasNext()) {
int event = xr.next();
switch (event) {
case XMLStreamConstants.START_ELEMENT: {
String elName = xr.getLocalName();
if (LOGIN_NAME.equals(elName)) {
userName = xr.getElementText();
} else if (CONTENT.equals(elName)) {
content = StringUtils.trimToEmpty(xr.getElementText());
}
break;
}
case XMLStreamConstants.END_ELEMENT: {
String elName = xr.getLocalName();
if (MESSAGE.equals(elName)) {
return new Message(userName, content);
}
break;
}
case XMLStreamConstants.END_DOCUMENT:
throw new XMLStreamException("xml not well-formed: <"
+ MESSAGE + "> tag not closed");
}
}
throw new XMLStreamException(
"unexpected end of xml file while parsing a message");
}
}
and my input.xml
is:
<?xml version="1.0" encoding="UTF-8"?>
<!-- Data provided by Bloomberg LP. -->
<FileDump>
<Version>IBXML 1.3</Version>
<Conversation Perspective=" " RoomType="P">
<RoomID>PCHAT-0x3000001CA8361</RoomID>
<StartTime>03/31/2016 13:39:01</StartTime>
<StartTimeUTC>1459431541</StartTimeUTC>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 13:39:01</DateTime>
<DateTimeUTC>1459431541</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 14:56:22</DateTime>
<DateTimeUTC>1459436182</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantLeft InteractionType="N" DeviceType="M">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 19:30:01</DateTime>
<DateTimeUTC>1459452601</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantLeft>
<ParticipantLeft InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 19:33:56</DateTime>
<DateTimeUTC>1459452836</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantLeft>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 19:45:16</DateTime>
<DateTimeUTC>1459453516</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantLeft InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 23:08:09</DateTime>
<DateTimeUTC>1459465689</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantLeft>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>03/31/2016 23:14:23</DateTime>
<DateTimeUTC>1459466063</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<Message InteractionType="N">
<User>
<LoginName>G_LO</LoginName>
<FirstName>GARY</FirstName>
<LastName>LO</LastName>
<UUID>7054548</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>G_LO@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:10:57</DateTime>
<DateTimeUTC>1459469457</DateTimeUTC>
<Content>abcdefgghhhhhh</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>WVU</LoginName>
<FirstName>WHEELOCK</FirstName>
<LastName>VU</LastName>
<UUID>8266852</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>WVU@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:14:05</DateTime>
<DateTimeUTC>1459469645</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<ParticipantEntered InteractionType="N">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress />
</User>
<DateTime>04/01/2016 00:29:19</DateTime>
<DateTimeUTC>1459470559</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<Message InteractionType="N">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress />
</User>
<DateTime>04/01/2016 00:29:19</DateTime>
<DateTimeUTC>1459470559</DateTimeUTC>
<Content>ajdakjgdljsgdsafhkafa</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress />
</User>
<DateTime>04/01/2016 00:29:19</DateTime>
<DateTimeUTC>1459470559</DateTimeUTC>
<Content>akjdgljsafdlshf;kdsjf</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N">
<User>
<LoginName>WVU</LoginName>
<FirstName>WHEELOCK</FirstName>
<LastName>VU</LastName>
<UUID>8266852</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>91189</AccountNumber>
<CompanyName>DBS BANK (HONG KONG)</CompanyName>
<EmailAddress>WVU@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 00:39:32</DateTime>
<DateTimeUTC>1459471172</DateTimeUTC>
<Content>sagdksajdlsahd</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<ParticipantEntered InteractionType="N" DeviceType="M">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 01:01:27</DateTime>
<DateTimeUTC>1459472487</DateTimeUTC>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</ParticipantEntered>
<Message InteractionType="N">
<User>
<LoginName>SWONG00</LoginName>
<FirstName>STEPHEN</FirstName>
<LastName>WONG</LastName>
<UUID>4397109</UUID>
<FirmNumber>13133</FirmNumber>
<AccountNumber>231115</AccountNumber>
<CompanyName>DBS BANK LIMITED HON</CompanyName>
<EmailAddress>SWONG00@Bloomberg.net</EmailAddress>
<CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress>
</User>
<DateTime>04/01/2016 01:31:29</DateTime>
<DateTimeUTC>1459474289</DateTimeUTC>
<Content>ajdslsahdsj;a</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress />
</User>
<DateTime>04/01/2016 02:49:46</DateTime>
<DateTimeUTC>1459478986</DateTimeUTC>
<Content>sagdkjsagdkjashdlasjd</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress />
</User>
<DateTime>04/01/2016 02:49:46</DateTime>
<DateTimeUTC>1459478986</DateTimeUTC>
<Content>jsdhkshdksjdlsjdlks</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress />
</User>
<DateTime>04/01/2016 03:47:37</DateTime>
<DateTimeUTC>1459482457</DateTimeUTC>
<Content>jshdkshdksjdlskld</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<Message InteractionType="N" DeviceType="M">
<User>
<LoginName>FCHAN95</LoginName>
<FirstName>FLORENCE</FirstName>
<LastName>CHAN</LastName>
<CompanyName>GOLDMAN SACHS (ASIA)</CompanyName>
<EmailAddress>FCHAN95@Bloomberg.net</EmailAddress>
<CorporateEmailAddress />
</User>
<DateTime>04/01/2016 03:47:37</DateTime>
<DateTimeUTC>1459482457</DateTimeUTC>
<Content>aasasasasas</Content>
<ConversationID>PCHAT-0x3000001CA8361</ConversationID>
</Message>
<EndTime>04/01/2016 03:47:37</EndTime>
<EndTimeUTC>1459482457</EndTimeUTC>
</Conversation>
</FileDump>
Currently I am displaying user and content, but I want to print
userName(CountOfMessages UserSent)+userName(CountOfMessages UserSent)
Ex: G_LO(1)+FCHAN95(6)+WVU(1)+SWONG00(1)
I tried HashMap<String, Integer>
but it's not working as expected. Java 8 features is not working as well. Also tried Multiset
of Guava but to no avail.
回答1:
You can achieve it quite easily using SAX (note , not StAX). You should be able to do this with StAX as well. I will post an update when i do it. A reference implementation on SAX is as follows
import java.io.StringReader;
import java.util.HashMap;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class NumCountHandler extends DefaultHandler {
private HashMap<String, Integer> countOfNum = new HashMap<String, Integer>();
boolean isStartTagPass = false;
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equalsIgnoreCase("LoginName")) {
isStartTagPass = true;
}
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
String attributeNum = new String(ch, start, length);
if (isStartTagPass) {
if (countOfNum.containsKey(attributeNum)) {
Integer count = countOfNum.get(attributeNum);
countOfNum.put(attributeNum, new Integer(count.intValue() + 1));
} else {
countOfNum.put(attributeNum, new Integer(1));
}
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equalsIgnoreCase("LoginName")) {
isStartTagPass = false;
}
}
public static void main(String[] args) {
try {
String xml = "<FileDump> <Version>IBXML 1.3</Version> <Conversation Perspective=\" \" RoomType=\"P\"> <RoomID>PCHAT-0x3000001CA8361</RoomID> <StartTime>03/31/2016 13:39:01</StartTime> <StartTimeUTC>1459431541</StartTimeUTC> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 13:39:01</DateTime> <DateTimeUTC>1459431541</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>03/31/2016 14:56:22</DateTime> <DateTimeUTC>1459436182</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantLeft InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>03/31/2016 19:30:01</DateTime> <DateTimeUTC>1459452601</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantLeft> <ParticipantLeft InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 19:33:56</DateTime> <DateTimeUTC>1459452836</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantLeft> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 19:45:16</DateTime> <DateTimeUTC>1459453516</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantLeft InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 23:08:09</DateTime> <DateTimeUTC>1459465689</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantLeft> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>03/31/2016 23:14:23</DateTime> <DateTimeUTC>1459466063</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <Message InteractionType=\"N\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>04/01/2016 00:10:57</DateTime> <DateTimeUTC>1459469457</DateTimeUTC> <Content>abcdefgghhhhhh</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>WVU</LoginName><FirstName>WHEELOCK</FirstName><LastName>VU</LastName><UUID>8266852</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>WVU@Bloomberg.net</EmailAddress><CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 00:14:05</DateTime> <DateTimeUTC>1459469645</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantEntered InteractionType=\"N\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 00:29:19</DateTime> <DateTimeUTC>1459470559</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <Message InteractionType=\"N\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 00:29:19</DateTime> <DateTimeUTC>1459470559</DateTimeUTC> <Content>ajdakjgdljsgdsafhkafa</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 00:29:19</DateTime> <DateTimeUTC>1459470559</DateTimeUTC> <Content>akjdgljsafdlshf;kdsjf</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\"> <User><LoginName>WVU</LoginName><FirstName>WHEELOCK</FirstName><LastName>VU</LastName><UUID>8266852</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>WVU@Bloomberg.net</EmailAddress><CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 00:39:32</DateTime> <DateTimeUTC>1459471172</DateTimeUTC> <Content>sagdksajdlsahd</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 01:01:27</DateTime> <DateTimeUTC>1459472487</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <Message InteractionType=\"N\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 01:31:29</DateTime> <DateTimeUTC>1459474289</DateTimeUTC> <Content>ajdslsahdsj;a</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 02:49:46</DateTime> <DateTimeUTC>1459478986</DateTimeUTC> <Content>sagdkjsagdkjashdlasjd</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 02:49:46</DateTime> <DateTimeUTC>1459478986</DateTimeUTC> <Content>jsdhkshdksjdlsjdlks</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 03:47:37</DateTime> <DateTimeUTC>1459482457</DateTimeUTC> <Content>jshdkshdksjdlskld</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 03:47:37</DateTime> <DateTimeUTC>1459482457</DateTimeUTC> <Content>aasasasasas</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <EndTime>04/01/2016 03:47:37</EndTime> <EndTimeUTC>1459482457</EndTimeUTC> </Conversation></FileDump>";
;
InputSource is = new InputSource(new StringReader(xml));
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
NumCountHandler userhandler = new NumCountHandler();
saxParser.parse(is, userhandler);
userhandler.countOfNum
.forEach((k, v) -> System.out.print(k +"("+v+") "));
} catch (Exception e) {
e.printStackTrace();
}
}
}
and this prints : WVU(2)+G_LO(4)+FCHAN95(7)+SWONG00(6)+
=====Added a StaX based implementation==========
Can be definitely improved with Java 8 features. Also , i have used static variables just for a small pilot , should be able to do it with class variables as well with some refractoring
import java.io.StringReader;
import java.util.HashMap;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent;
public class NumCountHandlerStax {
private boolean isStartTagPass = false;
static private void groupAndProcess(Object object, HashMap<String, Integer> countOfNum,
NumCountHandlerStax staxBasedCounter) {
XMLEvent event = (XMLEvent) object;
if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals("LoginName")) {
staxBasedCounter.isStartTagPass = true;
} else if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals("LoginName")) {
staxBasedCounter.isStartTagPass = false;
} else if (staxBasedCounter.isStartTagPass && event.isCharacters()) {
Characters characters = event.asCharacters();
String attributeNum = characters.getData();
if (countOfNum.containsKey(attributeNum)) {
Integer count = countOfNum.get(attributeNum);
countOfNum.put(attributeNum, new Integer(count.intValue() + 1));
} else {
countOfNum.put(attributeNum, new Integer(1));
}
}
}
public static void main(String[] args) {
try {
NumCountHandlerStax staxBasedCounter = new NumCountHandlerStax();
HashMap<String, Integer> countOfNum = new HashMap<String, Integer>();
String xml = "<FileDump> <Version>IBXML 1.3</Version> <Conversation Perspective=\" \" RoomType=\"P\"> <RoomID>PCHAT-0x3000001CA8361</RoomID> <StartTime>03/31/2016 13:39:01</StartTime> <StartTimeUTC>1459431541</StartTimeUTC> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 13:39:01</DateTime> <DateTimeUTC>1459431541</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>03/31/2016 14:56:22</DateTime> <DateTimeUTC>1459436182</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantLeft InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>03/31/2016 19:30:01</DateTime> <DateTimeUTC>1459452601</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantLeft> <ParticipantLeft InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 19:33:56</DateTime> <DateTimeUTC>1459452836</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantLeft> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 19:45:16</DateTime> <DateTimeUTC>1459453516</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantLeft InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>03/31/2016 23:08:09</DateTime> <DateTimeUTC>1459465689</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantLeft> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>03/31/2016 23:14:23</DateTime> <DateTimeUTC>1459466063</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <Message InteractionType=\"N\"> <User><LoginName>G_LO</LoginName><FirstName>GARY</FirstName><LastName>LO</LastName><UUID>7054548</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>G_LO@Bloomberg.net</EmailAddress><CorporateEmailAddress>garyloyc@dbs.com</CorporateEmailAddress> </User> <DateTime>04/01/2016 00:10:57</DateTime> <DateTimeUTC>1459469457</DateTimeUTC> <Content>abcdefgghhhhhh</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>WVU</LoginName><FirstName>WHEELOCK</FirstName><LastName>VU</LastName><UUID>8266852</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>WVU@Bloomberg.net</EmailAddress><CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 00:14:05</DateTime> <DateTimeUTC>1459469645</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <ParticipantEntered InteractionType=\"N\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 00:29:19</DateTime> <DateTimeUTC>1459470559</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <Message InteractionType=\"N\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 00:29:19</DateTime> <DateTimeUTC>1459470559</DateTimeUTC> <Content>ajdakjgdljsgdsafhkafa</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 00:29:19</DateTime> <DateTimeUTC>1459470559</DateTimeUTC> <Content>akjdgljsafdlshf;kdsjf</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\"> <User><LoginName>WVU</LoginName><FirstName>WHEELOCK</FirstName><LastName>VU</LastName><UUID>8266852</UUID><FirmNumber>13133</FirmNumber><AccountNumber>91189</AccountNumber><CompanyName>DBS BANK (HONG KONG)</CompanyName><EmailAddress>WVU@Bloomberg.net</EmailAddress><CorporateEmailAddress>WHEELOCKVU@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 00:39:32</DateTime> <DateTimeUTC>1459471172</DateTimeUTC> <Content>sagdksajdlsahd</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <ParticipantEntered InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 01:01:27</DateTime> <DateTimeUTC>1459472487</DateTimeUTC> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </ParticipantEntered> <Message InteractionType=\"N\"> <User><LoginName>SWONG00</LoginName><FirstName>STEPHEN</FirstName><LastName>WONG</LastName><UUID>4397109</UUID><FirmNumber>13133</FirmNumber><AccountNumber>231115</AccountNumber><CompanyName>DBS BANK LIMITED HON</CompanyName><EmailAddress>SWONG00@Bloomberg.net</EmailAddress><CorporateEmailAddress>STEPHENWONGWE@DBS.COM</CorporateEmailAddress> </User> <DateTime>04/01/2016 01:31:29</DateTime> <DateTimeUTC>1459474289</DateTimeUTC> <Content>ajdslsahdsj;a</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 02:49:46</DateTime> <DateTimeUTC>1459478986</DateTimeUTC> <Content>sagdkjsagdkjashdlasjd</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 02:49:46</DateTime> <DateTimeUTC>1459478986</DateTimeUTC> <Content>jsdhkshdksjdlsjdlks</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 03:47:37</DateTime> <DateTimeUTC>1459482457</DateTimeUTC> <Content>jshdkshdksjdlskld</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <Message InteractionType=\"N\" DeviceType=\"M\"> <User><LoginName>FCHAN95</LoginName><FirstName>FLORENCE</FirstName><LastName>CHAN</LastName><CompanyName>GOLDMAN SACHS (ASIA)</CompanyName><EmailAddress>FCHAN95@Bloomberg.net</EmailAddress><CorporateEmailAddress /> </User> <DateTime>04/01/2016 03:47:37</DateTime> <DateTimeUTC>1459482457</DateTimeUTC> <Content>aasasasasas</Content> <ConversationID>PCHAT-0x3000001CA8361</ConversationID> </Message> <EndTime>04/01/2016 03:47:37</EndTime> <EndTimeUTC>1459482457</EndTimeUTC> </Conversation></FileDump>";
;
XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(new StringReader(xml));
xmlEventReader.forEachRemaining(event -> groupAndProcess(event, countOfNum, staxBasedCounter));
countOfNum.forEach((k, v) -> System.out.print(k + "(" + v + ") "));
} catch (Exception e) {
e.printStackTrace();
}
}
}
Prints `WVU(2) G_LO(4) FCHAN95(7) SWONG00(6)`
回答2:
Using an XSLT 3.0 processor like Saxon 9.7 EE you could do it in a declarative way with a stylesheet using an accumulator:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:math="http://www.w3.org/2005/xpath-functions/math"
xmlns:map="http://www.w3.org/2005/xpath-functions/map"
exclude-result-prefixes="xs math map"
version="3.0">
<xsl:accumulator name="message-count" as="map(xs:string, xs:integer)" initial-value="map{}" streamable="yes">
<xsl:accumulator-rule
match="Message/User/LoginName/text()"
select="if (map:contains($value, .))
then map:put($value, string(), map:get($value, .) + 1)
else map:put($value, string(), 1)"/>
</xsl:accumulator>
<xsl:mode streamable="yes" on-no-match="shallow-skip"/>
<xsl:global-context-item streamable="yes" use-accumulators="message-count"/>
<xsl:output method="text"/>
<xsl:template match="/*">
<xsl:apply-templates/>
<xsl:value-of select="serialize(accumulator-after('message-count'), map { 'method' : 'adaptive' })"/>
</xsl:template>
</xsl:stylesheet>
Output for the sample you have posted is map{"SWONG00":1,"FCHAN95":6,"WVU":1,"G_LO":1}
.
回答3:
This is the example based on XPath and VTD-XML. VTD-XML will not blow up with big XML like DOM. XPath makes the code logic easy to understand, simple to maintain. if you want to count something different just throw in a different xpath query.
import java.util.HashMap;
import com.ximpleware.*;.
public class stats {
public static void main(String[] s)throws VTDException{
VTDGen vg = new VTDGen();
if (!vg.parseFile("d:\\xml\\dump.xml", false)){
System.out.println("parsing error");
return;
}
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
HashMap <String,Integer> hit = new HashMap<String, Integer>();
ap.selectXPath("/FileDump/Conversation/Message/User/LoginName/text()");
int i=0;
while((i=ap.evalXPath())!=-1){
String s1 = vn.toNormalizedString(i);
if (hit.containsKey(s1)){
Integer it = hit.get(s1);
hit.put(s1, new Integer(it.intValue()+1));
}else{
hit.put(s1, new Integer(1));
}
}
System.out.println(hit.toString());
}
}
来源:https://stackoverflow.com/questions/39217924/parse-xml-using-java-stax-count-number-of-content-tags