Sparql query getting duplicate or not I dont understant

本小妞迷上赌 提交于 2019-12-23 02:39:09

问题


I am using this query to get all programming languages and their details. This is my test class. I have used it in java and it works fine. The problem I am facing is, there is a language named "ML (programming language)"

It prints multiple times with different abstract, different influenced. Not only ML but also some other languages doing this. I don't know is there any problem in my query or its getting exact data as it is.

package io.naztech.dbpedia;

import java.io.ByteArrayOutputStream;
import java.util.List;

import org.apache.jena.query.ResultSet;
import org.apache.jena.query.ResultSetFormatter;
import org.apache.jena.sparql.engine.http.QueryEngineHTTP;
import org.junit.BeforeClass;
import org.junit.Test;

import io.naztech.talent.model.PediaTag;

public class testDataFetching {

    @Test
    public void testAllDataFetching() {

        String q =  "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n"+
                    "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> \n"+
                    "PREFIX dbo: <http://dbpedia.org/ontology/> \n"+
                    "PREFIX dbp: <http://dbpedia.org/property/> \n"+
                    "PREFIX owl: <http://www.w3.org/2002/07/owl#> \n"+
                    "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> \n" +
                    "PREFIX foaf: <http://xmlns.com/foaf/0.1/> \n" +
                    "PREFIX dc: <http://purl.org/dc/elements/1.1/> \n" +
                    "PREFIX : <http://dbpedia.org/resource/> \n" +
                    "PREFIX dbpedia2: <http://dbpedia.org/property/> \n" +
                    "PREFIX dbpedia: <http://dbpedia.org/> \n" +
                    "PREFIX skos: <http://www.w3.org/2004/02/skos/core#> \n" +

                    "SELECT DISTINCT ?pl ?pl_label ?abstract ?_thumbnail \n" +
                    "( Group_concat ( DISTINCT ?_influenced_label; separator= \", \")   AS ?influenced ) \n" + 
                    "( Group_concat ( DISTINCT ?_influencedBy_label; separator= \", \") AS ?influencedBy ) \n" + 
                    "( group_concat ( DISTINCT ?_sameAs; separator=\", \" ) AS ?sameAs ) \n" +
                    "( group_concat ( DISTINCT ?_paradigm_label; separator=\", \" ) AS ?paradigm ) \n" +

                    "WHERE  {\n" +

                    "       ?pl rdf:type dbo:ProgrammingLanguage .\n" + 

                    "       OPTIONAL { ?pl dbo:abstract ?abstract .\n" + 

                    "       FILTER ( LANG ( ?abstract ) = 'en' ) . } \n" + 

                    "       ?pl rdfs:label ?pl_label .\n" + 

                    "       FILTER ( LANG ( ?pl_label ) = 'en' ) .\n" + 

                    "       OPTIONAL { ?pl dbo:influenced ?_influenced . \n" + 

                    "       ?_influenced rdfs:label ?_influenced_label . \n" + 

                    "       FILTER ( LANG ( ?_influenced_label ) = 'en' ) . } \n" + 

                    "       OPTIONAL { ?pl dbo:influencedBy  ?_influencedBy . \n" + 

                    "       ?_influencedBy  rdfs:label ?_influencedBy_label . \n" + 

                    "       FILTER ( LANG ( ?_influencedBy_label ) = 'en' ) . } \n" +

                    "       OPTIONAL { ?pl owl:sameAs ?_sameAs . } \n" +

                    "       OPTIONAL { ?pl dbp:paradigm ?_paradigm . \n" +

                    "       ?_paradigm rdfs:label ?_paradigm_label . } \n" + 

                    "       OPTIONAL { ?pl dbo:thumbnail ?_thumbnail . } \n" +

                    "       }"+

                    "       GROUP BY ?pl ?pl_label ?abstract ?_thumbnail ?influenced ?influencedBy ?sameAs ?paradigm";

        @SuppressWarnings("resource")
        QueryEngineHTTP queryEngine = new QueryEngineHTTP("http://live.dbpedia.org/sparql", q);
        ResultSet results = queryEngine.execSelect();

        int count = 0;

        while (results.hasNext()) 
        {
            QuerySolution qs =  results.next();
            System.out.println("NAME-->\n"+qs.get("pl_label").toString()+"\n");

            if(qs.get("influenced") != null)
            {
            System.out.println("INFLUENCED-->\n"+qs.get("influenced").toString()+"\n"); 
            }
           if(qs.get("influencedBy") != null)
            {
                System.out.println("INFLUENCED BY-->\n"+qs.get("influencedBy").toString()+"\n"); 
            }
           if(qs.get("abstract") != null)
            {
                System.out.println("ABSTRACT-->\n"+qs.get("abstract").toString()+"\n");  
            }

            if(qs.get("sameAs") != null)
            {
                System.out.println("SAME AS-->\n"+qs.get("sameAs").toString()+"\n");  
            }

            if(qs.get("paradigm") != null)
            {
            System.out.println("PARADIGM-->\n"+qs.get("paradigm").toString()+"\n");  
            }

            if(qs.get("_thumbnail") != null)
            {
                System.out.println("THUMBNAIL-->\n"+qs.get("_thumbnail").toString()+"\n");  
            }

            System.out.println("\n");

            count++;
        }

        System.out.println(count);



    }

}

回答1:


There are 3 English abstracts in the dataset, look at the DBpedia Live resource.

You can workaround this by removing the ?abstract variable from the group by ... part and instead using an aggregate function (sample, min, max) to get any of the abstracts:

SELECT  ?pl ?pl_label 
        (MIN(?_abstract) AS ?abstract) # <- used MIN here to ensure stable result
        ?_thumbnail 
        (GROUP_CONCAT(DISTINCT ?_influenced_label ; separator='; ') AS ?influenced) 
        (GROUP_CONCAT(DISTINCT ?_influencedBy_label ; separator='; ') AS ?influencedBy) 
        (GROUP_CONCAT(DISTINCT ?_sameAs ; separator=', ') AS ?sameAs) 
        (GROUP_CONCAT(DISTINCT ?_paradigm_label ; separator=', ') AS ?paradigm)
WHERE
  { ?pl  a  dbo:ProgrammingLanguage ;
         rdfs:label  ?pl_label
    FILTER ( lang(?pl_label) = "en" )

    OPTIONAL
      { ?pl  dbo:abstract  ?_abstract
        FILTER ( lang(?_abstract) = "en" )
      }
    OPTIONAL
      { ?pl       dbo:influenced/rdfs:label  ?_influenced_label
        FILTER ( lang(?_influenced_label) = "en" )
      }
    OPTIONAL
      { ?pl       dbo:influencedBy/rdfs:label  ?_influencedBy_label
        FILTER ( lang(?_influencedBy_label) = "en" )
      }
    OPTIONAL
      { ?pl  owl:sameAs  ?_sameAs }
    OPTIONAL
      { ?pl       dbp:paradigm/rdfs:label  ?_paradigm_label
        FILTER ( lang(?_paradigm_label) = "en" )
      }
    OPTIONAL
      { ?pl  dbo:thumbnail  ?_thumbnail }
  }
GROUP BY ?pl ?pl_label ?_thumbnail

Update

I'll add the comment from @TallTed here, he's one of the people behind Virtuoso and knows it better than me:

Be aware that while the recommended aggregate functions (MIN, MAX, SAMPLE) will get a value of the predicate, there is no assurance that this value will be the latest ingested to the dataset.



来源:https://stackoverflow.com/questions/55379468/sparql-query-getting-duplicate-or-not-i-dont-understant

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!