这里要读一个xml文件,并把数据写入csv。
python代码
python 3.8
def read_xml3():
doc = etree.parse("D:/tmp/bbc_text/bbc-text.xml")
root=doc.getroot()
news_items =[]
with open("D:/tmp/bbc_text/bbc_text_python.csv", "w", encoding="utf-8") as f:
f.write("category,text\n")
for news_item_xml in root.getchildren():
f.write(news_item_xml.attrib['category'])
f.write(",")
f.write(news_item_xml.text.replace('"','""'))
f.write("\n")
f.flush()
f.close()
C#代码
public class BbcNewsItem
{
public string Category {
get; set; }
public string Text {
get; set; }
}
public static class XmlReader
{
public static void Read()
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load("D:/tmp/bbc_text/bbc-text.xml");
List<BbcNewsItem> newsItems = new List<BbcNewsItem>();
foreach (XmlNode node in xmlDoc.DocumentElement.ChildNodes)
{
XmlElement e = (XmlElement)node;
BbcNewsItem newsItem = new BbcNewsItem();
newsItem.Category = e.GetAttribute("category");
newsItem.Text = e.InnerText;
newsItems.Add(newsItem);
}
using (var writer = new StreamWriter("D:/tmp/bbc_text/bbc_text_cshape.csv"))
{
using (var csvWriter = new CsvWriter(writer, CultureInfo.InvariantCulture))
{
csvWriter.Configuration.HasHeaderRecord = true;
csvWriter.Configuration.AutoMap<BbcNewsItem>();
csvWriter.WriteRecords(newsItems);
writer.Flush();
writer.Close();
}
}
}
}
测试后所用时间为
235.8ms ± 19.8
不过,我后来发现,我用的是.net core 3.1。因为.net core是跨平台的,所以,并不是windows的机器码。应该是MSIL
于是改成了.net framework 4.5。
57.6 ms ± 10.8
C++代码
C++ 14
#include <iostream>
#include <fstream>
#include <chrono>
#include "pugixml.hpp"
using namespace std;
void findAndReplaceAll(std::string& data, std::string toSearch, std::string replaceStr)
{
// Get the first occurrence
size_t pos = data.find(toSearch);
// Repeat till end is reached
while (pos != std::string::npos)
{
// Replace this occurrence of Sub String
data.replace(pos, toSearch.size(), replaceStr);
// Get the next occurrence from the current position
pos = data.find(toSearch, pos + replaceStr.size());
}
}
int main()
{
auto start = chrono::system_clock::now();
std::cout << "Hello World!\n";
pugi::xml_document doc;
pugi::xml_parse_result result = doc.load_file("D:/tmp/bbc_text/bbc-text.xml");
if (!result)
return -1;
ofstream csv_file;
csv_file.open("D:/tmp/bbc_text/bbc_text_cpp.csv");
csv_file << "category,text" << endl;
auto children = doc.child("bbc").children();
for (auto child : children) {
string category = child.attribute("category").value();
string text = child.text().as_string();
findAndReplaceAll(text, "\"", "\"\"");
text = "\"" + text + "\"";
csv_file << category << "," << text << endl;
}
csv_file.flush();
csv_file.close();
auto end = chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
auto millis = std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_seconds).count();
std::cout << millis << "\n";
string c;
std::cin >>c;
}
C++的运行时间是34.8 ms ± 1.6
还有golang和java的代码,我放在github了:
https://github.com/EricWebsmith/lang_compare_xml
结论
语言 | 运行时间 |
---|---|
c++ | 34.8 ms ± 1.6 |
python | 38.5 ms ± 4.88 |
.net framework | 57.6 ms ± 10.8 |
golang | 171.5 ms ± 12.6 |
.net core | 235.8ms ± 19.8 |
java | 258.6 ms ± 15.0 |
.Net Core运行时间是python的2倍。
.Net Framework运行时间是python的1/2倍。
.Net Framework运行时间是.Net Core的1/2倍。
python确实比我想象的快了很多,我以为python应该垫底的。
不过,我还是不看好python开发运用程序。类型不安全。和c#,c++相比,编程的时候,自动提示很差。其实编程效率并不高。这种编程效率也低,运行速度也慢的语言,不适合用来做运用程序。
当然,谁都没有c++快。
来源:oschina
链接:https://my.oschina.net/u/4295105/blog/4867825