Package SPARQLWrapper :: Module Wrapper
[hide private]
[frames] | no frames]

Source Code for Module SPARQLWrapper.Wrapper

  1  # -*- coding: utf-8 -*- 
  2  # epydoc 
  3  # 
  4  """ 
  5  @var JSON: to be used to set the return format to JSON 
  6  @var XML: to be used to set the return format to XML (SPARQL XML format or RDF/XML, depending on the query type). This is the default. 
  7  @var RDFXML: to be used to set the return format to RDF/XML explicitly. 
  8  @var TURTLE: to be used to set the return format to Turtle 
  9  @var N3: to be used to set the return format to N3 (for most of the SPARQL services this is equivalent to Turtle) 
 10  @var RDF: to be used to set the return RDF Graph 
 11  @var CSV: to be used to set the return format to CSV 
 12  @var TSV: to be used to set the return format to TSV 
 13  @var JSONLD: to be used to set the return format to JSON-LD 
 14   
 15  @var POST: to be used to set HTTP POST 
 16  @var GET: to be used to set HTTP GET. This is the default. 
 17   
 18  @var SELECT: to be used to set the query type to SELECT. This is, usually, determined automatically. 
 19  @var CONSTRUCT: to be used to set the query type to CONSTRUCT. This is, usually, determined automatically. 
 20  @var ASK: to be used to set the query type to ASK. This is, usually, determined automatically. 
 21  @var DESCRIBE: to be used to set the query type to DESCRIBE. This is, usually, determined automatically. 
 22   
 23  @var INSERT: to be used to set the query type to INSERT. 
 24  @var DELETE: to be used to set the query type to DELETE. 
 25  @var CREATE: to be used to set the query type to CREATE. 
 26  @var CLEAR: to be used to set the query type to CLEAR. 
 27  @var DROP: to be used to set the query type to DROP. 
 28  @var LOAD: to be used to set the query type to LOAD. 
 29  @var COPY: to be used to set the query type to COPY. 
 30  @var MOVE: to be used to set the query type to MOVE. 
 31  @var ADD: to be used to set the query type to ADD. 
 32   
 33   
 34  @var BASIC: BASIC HTTP Authentication method 
 35  @var DIGEST: DIGEST HTTP Authentication method 
 36   
 37  @see: U{SPARQL Specification<http://www.w3.org/TR/rdf-sparql-query/>} 
 38  @authors: U{Ivan Herman<http://www.ivan-herman.net>}, U{Sergio Fernández<http://www.wikier.org>}, U{Carlos Tejo Alonso<http://www.dayures.net>} 
 39  @organization: U{World Wide Web Consortium<http://www.w3.org>}, U{Salzburg Research<http://www.salzburgresearch.at>} and U{Foundation CTIC<http://www.fundacionctic.org/>}. 
 40  @license: U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/copyright-software">} 
 41  @requires: U{RDFLib<http://rdflib.net>} package. 
 42  """ 
 43   
 44  import urllib 
 45  import urllib2 
 46  from urllib2 import urlopen as urlopener  # don't change the name: tests override it 
 47  import socket 
 48  import base64 
 49  import re 
 50  import sys 
 51  import warnings 
 52   
 53  import json 
 54  from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict 
 55  from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError 
 56  from SPARQLWrapper import __agent__ 
 57   
 58  #  From <https://www.w3.org/TR/sparql11-protocol/#query-success> 
 59  #  The response body of a successful query operation with a 2XX response is either: 
 60  #  * SELECT and ASK: a SPARQL Results Document in XML, JSON, or CSV/TSV format. 
 61  #  * DESCRIBE and CONSTRUCT: an RDF graph serialized, for example, in the RDF/XML syntax, or an equivalent RDF graph serialization. 
 62  # 
 63  #  Possible parameter keys and values... 
 64  #  Examples: 
 65  #  - ClioPatria: the SWI-Prolog Semantic Web Server <http://cliopatria.swi-prolog.org/home> 
 66  #    * Parameter key: "format" 
 67  #    * Parameter value must have one of these values: "rdf+xml", "json", "csv", "application/sparql-results+xml" or "application/sparql-results+json". 
 68  # 
 69  #  - OpenLink Virtuoso  <http://virtuoso.openlinksw.com> 
 70  #    * Parameter key: "format" or "output" 
 71  #    * Parameter value, like directly: 
 72  #      "text/html" (HTML), "text/x-html+tr" (HTML (Faceted Browsing Links)), "application/vnd.ms-excel" 
 73  #      "application/sparql-results+xml" (XML), "application/sparql-results+json", (JSON) 
 74  #      "application/javascript" (Javascript), "text/turtle" (Turtle), "application/rdf+xml" (RDF/XML) 
 75  #      "text/plain" (N-Triples), "text/csv" (CSV), "text/tab-separated-values" (TSV) 
 76  #    * Parameter value, like indirectly: 
 77  #      "HTML" (alias text/html), "JSON" (alias application/sparql-results+json), "XML" (alias application/sparql-results+xml), "TURTLE" (alias text/rdf+n3), JavaScript (alias application/javascript) 
 78  #       See  <http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/VOSSparqlProtocol#Additional HTTP Response Formats -- SELECT> 
 79  # 
 80  #  - Fuseki (formerly there was Joseki) <https://jena.apache.org/documentation/serving_data/> 
 81  #    * Parameter key: "format" or "output" 
 82  #      See Fuseki 1: https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/HttpNames.java 
 83  #      See Fuseki 2: https://github.com/apache/jena/blob/master/jena-arq/src/main/java/org/apache/jena/riot/web/HttpNames.java 
 84  #    * Fuseki 1 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift" 
 85  #      See <https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java> 
 86  #    * Fuseki 2 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift" 
 87  #      See <https://github.com/apache/jena/blob/master/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java> 
 88  # 
 89  #  - Eclipse RDF4J (formerly known as Sesame) <http://rdf4j.org/> 
 90  #    * Uses only content negotiation (no URL parameters). 
 91  #    * See <http://rdf4j.org/doc/the-rdf4j-server-rest-api/#The_QUERY_operation> 
 92  # 
 93  #  - RASQAL <http://librdf.org/rasqal/> 
 94  #    * Parameter key: "results" 
 95  #    * Uses roqet as RDF query utility 
 96  #      For variable bindings, the values of FORMAT vary upon what Rasqal supports but include simple 
 97  #      for a simple text format (default), xml for the SPARQL Query Results XML format, csv for SPARQL CSV, 
 98  #      tsv for SPARQL TSV, rdfxml and turtle for RDF syntax formats, and json for a JSON version of the results. 
 99  # 
100  #      For RDF graph results, the values of FORMAT are ntriples (N-Triples, default), 
101  #      rdfxml-abbrev (RDF/XML Abbreviated), rdfxml (RDF/XML), turtle (Turtle), 
102  #      json (RDF/JSON resource centric), json-triples (RDF/JSON triples) or 
103  #      rss-1.0 (RSS 1.0, also an RDF/XML syntax). 
104  # 
105  #      See <http://librdf.org/rasqal/roqet.html> 
106  # 
107  #  - Marklogic <http://marklogic.com> 
108  #    * Uses content negotiation (no URL parameters). 
109  #    * You can use following methods to query triples <https://docs.marklogic.com/guide/semantics/semantic-searches#chapter>: 
110  #      - SPARQL mode in Query Console. For details, see Querying Triples with SPARQL 
111  #      - XQuery using the semantics functions, and Search API, or a combination of XQuery and SPARQL. For details, see Querying Triples with XQuery or JavaScript. 
112  #      - HTTP via a SPARQL endpoint. For details, see Using Semantics with the REST Client API. 
113  #    * Formats are specified as part of the HTTP Accept headers of the REST request. <https://docs.marklogic.com/guide/semantics/REST#id_92428> 
114  #      - When you query the SPARQL endpoint with REST Client APIs, you can specify the result output format.  <https://docs.marklogic.com/guide/semantics/REST#id_54258> 
115  #        The response type format depends on the type of query and the MIME type in the HTTP Accept header. 
116  #      - This table describes the MIME types and Accept Header/Output formats (MIME type) for different types of SPARQL queries. See <https://docs.marklogic.com/guide/semantics/REST#id_54258> and <https://docs.marklogic.com/guide/semantics/loading#id_70682> 
117  #        SELECT "application/sparql-results+xml", "application/sparql-results+json", "text/html", "text/csv" 
118  #        CONSTRUCT or DESCRIBE "application/n-triples", "application/rdf+json", "application/rdf+xml", "text/turtle", "text/n3", "application/n-quads", "application/trig" 
119  # 
120  #  - AllegroGraph <https://franz.com/agraph/allegrograph/> 
121  #    * Uses only content negotiation (no URL parameters). 
122  #    * The server always looks at the Accept header of a request, and tries to 
123  #      generate a response in the format that the client asks for. If this fails, 
124  #      a 406 response is returned. When no Accept, or an Accept of */* is specified, 
125  #      the server prefers text/plain, in order to make it easy to explore the interface from a web browser. 
126  #    * Accept header expected (values returned by server when a wrong header is sent): 
127  #    ** SELECT 
128  #    *** application/sparql-results+xml 
129  #    *** application/sparql-results+json (and application/json) 
130  #    *** text/csv 
131  #    *** text/tab-separated-values 
132  #    *** OTHERS: application/sparql-results+ttl, text/integer, application/x-lisp-structured-expression, text/table, application/processed-csv, text/simple-csv, application/x-direct-upis 
133  # 
134  #    ** CONSTRUCT 
135  #    *** application/rdf+xml 
136  #    *** text/rdf+n3 
137  #    *** OTHERS: text/integer, application/json, text/plain, text/x-nquads, application/trix, text/table, application/x-direct-upis 
138  # 
139  #      See <https://franz.com/agraph/support/documentation/current/http-protocol.html> 
140   
141   
142  JSON   = "json" 
143  JSONLD = "json-ld" 
144  XML    = "xml" 
145  TURTLE = "turtle" 
146  N3     = "n3" 
147  RDF    = "rdf" 
148  RDFXML = "rdf+xml" 
149  CSV    = "csv" 
150  TSV    = "tsv" 
151  _allowedFormats = [JSON, XML, TURTLE, N3, RDF, RDFXML, CSV, TSV] 
152   
153  # Possible HTTP methods 
154  POST = "POST" 
155  GET  = "GET" 
156  _allowedRequests = [POST, GET] 
157   
158  # Possible HTTP Authentication methods 
159  BASIC = "BASIC" 
160  DIGEST = "DIGEST" 
161  _allowedAuth = [BASIC, DIGEST] 
162   
163  # Possible SPARQL/SPARUL query type (aka SPARQL Query forms) 
164  SELECT     = "SELECT" 
165  CONSTRUCT  = "CONSTRUCT" 
166  ASK        = "ASK" 
167  DESCRIBE   = "DESCRIBE" 
168  INSERT     = "INSERT" 
169  DELETE     = "DELETE" 
170  CREATE     = "CREATE" 
171  CLEAR      = "CLEAR" 
172  DROP       = "DROP" 
173  LOAD       = "LOAD" 
174  COPY       = "COPY" 
175  MOVE       = "MOVE" 
176  ADD        = "ADD" 
177  _allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP, 
178                        LOAD, COPY, MOVE, ADD] 
179   
180  # Possible methods to perform requests 
181  URLENCODED = "urlencoded" 
182  POSTDIRECTLY = "postdirectly" 
183  _REQUEST_METHODS  = [URLENCODED, POSTDIRECTLY] 
184   
185  # Possible output format (mime types) that can be converted by the local script. Unfortunately, 
186  # it does not work by simply setting the return format, because there is still a certain level of confusion 
187  # among implementations. 
188  # For example, Joseki returns application/javascript and not the sparql-results+json thing that is required... 
189  # Ie, alternatives should be given... 
190  # Andy Seaborne told me (June 2007) that the right return format is now added to his CVS, ie, future releases of 
191  # joseki will be o.k., too. The situation with turtle and n3 is even more confusing because the text/n3 and text/turtle 
192  # mime types have just been proposed and not yet widely used... 
193  _SPARQL_DEFAULT  = ["application/sparql-results+xml", "application/rdf+xml", "*/*"] 
194  _SPARQL_XML      = ["application/sparql-results+xml"] 
195  _SPARQL_JSON     = ["application/sparql-results+json", "text/javascript", "application/json"] 
196  _RDF_XML         = ["application/rdf+xml"] 
197  _RDF_N3          = ["text/rdf+n3", "application/n-triples", "application/turtle", "application/n3", "text/n3", "text/turtle"] 
198  _RDF_JSONLD      = ["application/x-json+ld", "application/ld+json"] 
199  _CSV             = ["text/csv"] 
200  _TSV             = ["text/tab-separated-values"] 
201  _XML             = ["application/xml"] 
202  _ALL             = ["*/*"] 
203  _RDF_POSSIBLE    = _RDF_XML + _RDF_N3 + _XML 
204  _SPARQL_PARAMS   = ["query"] 
205   
206  try: 
207      import rdflib_jsonld 
208      _allowedFormats.append(JSONLD) 
209      _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD 
210  except ImportError: 
211      #warnings.warn("JSON-LD disabled because no suitable support has been found", RuntimeWarning) 
212      pass 
213   
214  # This is very ugly. The fact is that the key for the choice of the output format is not defined. 
215  # Virtuoso uses 'format', joseki uses 'output', rasqual seems to use "results", etc. Lee Feigenbaum 
216  # told me that virtuoso also understand 'output' these days, so I removed 'format'. I do not have 
217  # info about the others yet, ie, for the time being I keep the general mechanism. Hopefully, in a 
218  # future release, I can get rid of that. However, these processors are (hopefully) oblivious to the 
219  # parameters they do not understand. So: just repeat all possibilities in the final URI. UGLY!!!!!!! 
220  _returnFormatSetting = ["format", "output", "results"] 
221   
222  ####################################################################################################### 
223   
224   
225 -class SPARQLWrapper(object):
226 """ 227 Wrapper around an online access to a SPARQL Web entry point. 228 229 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc, 230 are retained from one query to the next (in other words, only the query string changes). The instance can also be 231 reset to its initial values using the L{resetQuery} method. 232 233 @cvar pattern: regular expression used to determine whether a query is of type L{CONSTRUCT}, L{SELECT}, L{ASK}, or L{DESCRIBE}. 234 @type pattern: compiled regular expression (see the C{re} module of Python) 235 @ivar endpoint: SPARQL endpoint's URI 236 @type endpoint: string 237 @ivar updateEndpoint: SPARQL endpoint's URI for update operations (if it's a different one). Default is C{None} 238 @type updateEndpoint: string 239 @ivar agent: The User-Agent for the HTTP request header. 240 @type agent: string 241 @ivar _defaultGraph: URI for the default graph. Default is C{None}, the value can be set either via an L{explicit call<addParameter>}("default-graph-uri", uri) or as part of the query string. 242 @type _defaultGraph: string 243 @ivar user: The username of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}. 244 @type user: string 245 @ivar passwd: The password of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}. 246 @type passwd: string 247 @ivar http_auth: HTTP Authentication type. The default value is L{BASIC}. Possible values are L{BASIC} or L{DIGEST} 248 @type http_auth: string 249 @ivar onlyConneg: Option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters).The default value is L{False}. 250 @type onlyConneg: boolean 251 """ 252 pattern = re.compile(r""" 253 ((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))* 254 (?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD)) 255 """, re.VERBOSE | re.IGNORECASE) 256 comments_pattern = re.compile(r"(^|\n)\s*#.*?\n") 257
258 - def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
259 """ 260 Class encapsulating a full SPARQL call. 261 @param endpoint: string of the SPARQL endpoint's URI 262 @type endpoint: string 263 @param updateEndpoint: string of the SPARQL endpoint's URI for update operations (if it's a different one) 264 @type updateEndpoint: string 265 @param returnFormat: Default: L{XML}. 266 Can be set to JSON or Turtle/N3 267 268 No local check is done, the parameter is simply 269 sent to the endpoint. Eg, if the value is set to JSON and a construct query is issued, it 270 is up to the endpoint to react or not, this wrapper does not check. 271 272 Possible values: 273 L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDFXML}, L{CSV}, L{TSV} (constants in this module). The value can also be set via explicit 274 call, see below. 275 @type returnFormat: string 276 @param defaultGraph: URI for the default graph. Default is None, the value can be set either via an L{explicit call<addDefaultGraph>} or as part of the query string. 277 @type defaultGraph: string 278 @param agent: The User-Agent for the HTTP request header. 279 @type agent: string 280 """ 281 self.endpoint = endpoint 282 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint 283 self.agent = agent 284 self.user = None 285 self.passwd = None 286 self.http_auth = BASIC 287 self._defaultGraph = defaultGraph 288 self.onlyConneg = False # Only Content Negotiation 289 290 if returnFormat in _allowedFormats: 291 self._defaultReturnFormat = returnFormat 292 else: 293 self._defaultReturnFormat = XML 294 295 self.resetQuery()
296
297 - def resetQuery(self):
298 """Reset the query, ie, return format, method, query, default or named graph settings, etc, 299 are reset to their default values. 300 """ 301 self.parameters = {} 302 if self._defaultGraph: 303 self.addParameter("default-graph-uri", self._defaultGraph) 304 self.returnFormat = self._defaultReturnFormat 305 self.method = GET 306 self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""") 307 self.timeout = None 308 self.requestMethod = URLENCODED
309
310 - def setReturnFormat(self, format):
311 """Set the return format. If not an allowed value, the setting is ignored. 312 313 @param format: Possible values are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF}, L{RDFXML}, L{CSV}, L{TSV}, L{JSONLD} (constants in this module). All other cases are ignored. 314 @type format: string 315 @raise ValueError: if L{JSONLD} is tried to set and the current instance does not support JSON-LD. 316 """ 317 if format in _allowedFormats : 318 self.returnFormat = format 319 elif format == JSONLD: 320 raise ValueError("Current instance does not support JSON-LD; you might want to install the rdflib-json package.") 321 else: 322 warnings.warn("Ignore format '%s'; current instance supports: %s." %(format, ", ".join(_allowedFormats)), SyntaxWarning)
323
324 - def supportsReturnFormat(self, format):
325 """Check if a return format is supported. 326 327 @param format: Possible values are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF}, L{RDFXML}, L{CSV}, L{TSV} (constants in this module). All other cases are ignored. 328 @type format: string 329 @return: Returns a boolean after checking if a return format is supported. 330 @rtype: bool 331 """ 332 return (format in _allowedFormats)
333
334 - def setTimeout(self, timeout):
335 """Set the timeout (in seconds) to use for querying the endpoint. 336 337 @param timeout: Timeout in seconds. 338 @type timeout: int 339 """ 340 self.timeout = int(timeout)
341
342 - def setOnlyConneg(self, onlyConneg):
343 """Set this option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters). 344 345 @param onlyConneg: True if only HTTP Content Negotiation is allowed; False is HTTP parameters are allowed also. 346 @type onlyConneg: bool 347 """ 348 self.onlyConneg = onlyConneg
349
350 - def setRequestMethod(self, method):
351 """Set the internal method to use to perform the request for query or 352 update operations, either URL-encoded (C{SPARQLWrapper.URLENCODED}) or 353 POST directly (C{SPARQLWrapper.POSTDIRECTLY}). 354 Further details at U{http://www.w3.org/TR/sparql11-protocol/#query-operation} 355 and U{http://www.w3.org/TR/sparql11-protocol/#update-operation}. 356 357 @param method: Possible values are C{SPARQLWrapper.URLENCODED} (URL-encoded) or C{SPARQLWrapper.POSTDIRECTLY} (POST directly). All other cases are ignored. 358 @type method: string 359 """ 360 if method in _REQUEST_METHODS: 361 self.requestMethod = method 362 else: 363 warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
364
365 - def addDefaultGraph(self, uri):
366 """ 367 Add a default graph URI. 368 @param uri: URI of the graph 369 @type uri: string 370 @deprecated: use addParameter("default-graph-uri", uri) instead of this method 371 """ 372 self.addParameter("default-graph-uri", uri)
373
374 - def addNamedGraph(self, uri):
375 """ 376 Add a named graph URI. 377 @param uri: URI of the graph 378 @type uri: string 379 @deprecated: use addParameter("named-graph-uri", uri) instead of this method 380 """ 381 self.addParameter("named-graph-uri", uri)
382
383 - def addExtraURITag(self, key, value):
384 """ 385 Some SPARQL endpoints require extra key value pairs. 386 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 387 virtuoso to retrieve graphs that are not stored in its local database. 388 Alias of L{SPARQLWrapper.addParameter} method. 389 @param key: key of the query part 390 @type key: string 391 @param value: value of the query part 392 @type value: string 393 @deprecated: use addParameter(key, value) instead of this method 394 """ 395 self.addParameter(key, value)
396
397 - def addCustomParameter(self, name, value):
398 """ 399 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding. 400 @param name: name 401 @type name: string 402 @param value: value 403 @type value: string 404 @return: Returns a boolean indicating if the adding has been accomplished. 405 @rtype: bool 406 @deprecated: use addParameter(name, value) instead of this method 407 """ 408 self.clearParameter(name) 409 return self.addParameter(name, value)
410
411 - def addParameter(self, name, value):
412 """ 413 Some SPARQL endpoints allow extra key value pairs. 414 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 415 virtuoso to retrieve graphs that are not stored in its local database. 416 If the param C{query} is tried to be set, this intent is dismissed. 417 Returns a boolean indicating if the set has been accomplished. 418 @param name: name 419 @type name: string 420 @param value: value 421 @type value: string 422 @return: Returns a boolean indicating if the adding has been accomplished. 423 @rtype: bool 424 """ 425 if name in _SPARQL_PARAMS: 426 return False 427 else: 428 if name not in self.parameters: 429 self.parameters[name] = [] 430 self.parameters[name].append(value) 431 return True
432
433 - def clearParameter(self, name):
434 """ 435 Clear the values of a concrete parameter. 436 Returns a boolean indicating if the clearing has been accomplished. 437 @param name: name 438 @type name: string 439 @return: Returns a boolean indicating if the clearing has been accomplished. 440 @rtype: bool 441 """ 442 if name in _SPARQL_PARAMS: 443 return False 444 else: 445 try: 446 del self.parameters[name] 447 return True 448 except KeyError: 449 return False
450
451 - def setCredentials(self, user, passwd):
452 """ 453 Set the credentials for querying the current endpoint. 454 @param user: username 455 @type user: string 456 @param passwd: password 457 @type passwd: string 458 """ 459 self.user = user 460 self.passwd = passwd
461
462 - def setHTTPAuth(self, auth):
463 """ 464 Set the HTTP Authentication type. Possible values are L{BASIC} or L{DIGEST}. 465 @param auth: auth type 466 @type auth: string 467 """ 468 if not isinstance(auth, str): 469 raise TypeError('setHTTPAuth takes a string') 470 elif auth.upper() in _allowedAuth: 471 self.http_auth = auth.upper() 472 else: 473 valid_types = ", ".join(_allowedAuth) 474 raise ValueError("Value should be one of {0}".format(valid_types))
475
476 - def setQuery(self, query):
477 """ 478 Set the SPARQL query text. Note: no check is done on the validity of the query 479 (syntax or otherwise) by this module, except for testing the query type (SELECT, 480 ASK, etc). Syntax and validity checking is done by the SPARQL service itself. 481 @param query: query text 482 @type query: string 483 @bug: #2320024 484 """ 485 if sys.version < '3': # have to write it like this, for 2to3 compatibility 486 if isinstance(query, unicode): 487 pass 488 elif isinstance(query, str): 489 query = query.decode('utf-8') 490 else: 491 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 492 else: 493 if isinstance(query, str): 494 pass 495 elif isinstance(query, bytes): 496 query = query.decode('utf-8') 497 else: 498 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 499 500 self.queryString = query 501 self.queryType = self._parseQueryType(query)
502
503 - def _parseQueryType(self,query):
504 """ 505 Internal method for parsing the SPARQL query and return its type (ie, L{SELECT}, L{ASK}, etc). 506 507 Note that the method returns L{SELECT} if nothing is specified. This is just to get all other 508 methods running; in fact, this means that the query is erroneous, because the query must be, 509 according to the SPARQL specification, one of Select, Ask, Describe, or Construct. The 510 SPARQL endpoint should raise an exception (via urllib) for such syntax error. 511 512 @param query: query text 513 @type query: string 514 @return: the type of SPARQL query (aka SPARQL query form) 515 @rtype: string 516 """ 517 try: 518 query = query if type(query)==str else query.encode('ascii', 'ignore') 519 query = self._cleanComments(query) 520 r_queryType = self.pattern.search(query).group("queryType").upper() 521 except AttributeError: 522 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning) 523 r_queryType = None 524 525 if r_queryType in _allowedQueryTypes : 526 return r_queryType 527 else : 528 #raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") 529 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning) 530 return SELECT
531
532 - def setMethod(self,method):
533 """Set the invocation method. By default, this is L{GET}, but can be set to L{POST}. 534 @param method: should be either L{GET} or L{POST}. Other cases are ignored. 535 @type method: string 536 """ 537 if method in _allowedRequests : self.method = method
538
539 - def setUseKeepAlive(self):
540 """Make urllib2 use keep-alive. 541 @raise ImportError: when could not be imported keepalive.HTTPHandler 542 """ 543 try: 544 from keepalive import HTTPHandler 545 546 if urllib2._opener and any(isinstance(h, HTTPHandler) for h in urllib2._opener.handlers): 547 # already installed 548 return 549 550 keepalive_handler = HTTPHandler() 551 opener = urllib2.build_opener(keepalive_handler) 552 urllib2.install_opener(opener) 553 except ImportError: 554 warnings.warn("keepalive support not available, so the execution of this method has no effect")
555
556 - def isSparqlUpdateRequest(self):
557 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request. 558 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request 559 @rtype: bool 560 """ 561 return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]
562
563 - def isSparqlQueryRequest(self):
564 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request. 565 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request. 566 @rtype: bool 567 """ 568 return not self.isSparqlUpdateRequest()
569
570 - def _cleanComments(self, query):
571 """ Internal method for returning the query after all occurrence of singleline comments are removed (issues #32 and #77). 572 @param query: The query 573 @type query: string 574 @return: the query after all occurrence of singleline comments are removed. 575 @rtype: string 576 """ 577 return re.sub(self.comments_pattern, "\n\n" , query)
578
579 - def _getRequestEncodedParameters(self, query=None):
580 """ Internal method for getting the request encoded parameters. 581 @param query: The query 582 @type query: string 583 """ 584 query_parameters = self.parameters.copy() 585 586 if query and type(query) == tuple and len(query) == 2: 587 #tuple ("query"/"update", queryString) 588 query_parameters[query[0]] = [query[1]] 589 590 # This is very ugly. The fact is that the key for the choice of the output format is not defined. 591 # Virtuoso uses 'format',sparqler uses 'output' 592 # However, these processors are (hopefully) oblivious to the parameters they do not understand. 593 # So: just repeat all possibilities in the final URI. UGLY!!!!!!! 594 if not self.onlyConneg: 595 for f in _returnFormatSetting: 596 query_parameters[f] = [self.returnFormat] 597 # Virtuoso is not supporting a correct Accept header and an unexpected "output"/"format" parameter value. It returns a 406. 598 # "tsv" and "json-ld" are not supported as a correct "output"/"format" parameter value but "text/tab-separated-values" is a valid value, 599 # and there is no problem to send both. 600 if self.returnFormat in [TSV, JSONLD]: 601 acceptHeader = self._getAcceptHeader() # to obtain the mime-type "text/tab-separated-values" 602 if "*/*" in acceptHeader: 603 acceptHeader="" # clear the value in case of "*/*" 604 query_parameters[f]+= [acceptHeader] 605 606 pairs = ( 607 "%s=%s" % ( 608 urllib.quote_plus(param.encode('UTF-8'), safe='/'), 609 urllib.quote_plus(value.encode('UTF-8'), safe='/') 610 ) 611 for param, values in query_parameters.items() for value in values 612 ) 613 614 return '&'.join(pairs)
615
616 - def _getAcceptHeader(self):
617 """ Internal method for getting the HTTP Accept Header. 618 @see: U{Hypertext Transfer Protocol -- HTTP/1.1 - Header Field Definitions<https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1>} 619 """ 620 if self.queryType in [SELECT, ASK]: 621 if self.returnFormat == XML: 622 acceptHeader = ",".join(_SPARQL_XML) 623 elif self.returnFormat == JSON: 624 acceptHeader = ",".join(_SPARQL_JSON) 625 elif self.returnFormat == CSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) 626 acceptHeader = ",".join(_CSV) 627 elif self.returnFormat == TSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) 628 acceptHeader = ",".join(_TSV) 629 else: 630 acceptHeader = ",".join(_ALL) 631 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning) 632 elif self.queryType in [INSERT, DELETE]: 633 acceptHeader = "*/*" 634 else: #CONSTRUCT, DESCRIBE 635 if self.returnFormat == N3 or self.returnFormat == TURTLE: 636 acceptHeader = ",".join(_RDF_N3) 637 elif self.returnFormat == XML: 638 acceptHeader = ",".join(_RDF_XML) 639 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats: 640 acceptHeader = ",".join(_RDF_JSONLD) 641 else: 642 acceptHeader = ",".join(_ALL) 643 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning) 644 return acceptHeader
645
646 - def _createRequest(self):
647 """Internal method to create request according a HTTP method. Returns a 648 C{urllib2.Request} object of the urllib2 Python library 649 @return: request a C{urllib2.Request} object of the urllib2 Python library 650 """ 651 request = None 652 653 if self.isSparqlUpdateRequest(): 654 #protocol details at http://www.w3.org/TR/sparql11-protocol/#update-operation 655 uri = self.updateEndpoint 656 657 if self.method != POST: 658 warnings.warn("update operations MUST be done by POST") 659 660 if self.requestMethod == POSTDIRECTLY: 661 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 662 request.add_header("Content-Type", "application/sparql-update") 663 request.data = self.queryString.encode('UTF-8') 664 else: # URL-encoded 665 request = urllib2.Request(uri) 666 request.add_header("Content-Type", "application/x-www-form-urlencoded") 667 request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii') 668 else: 669 #protocol details at http://www.w3.org/TR/sparql11-protocol/#query-operation 670 uri = self.endpoint 671 672 if self.method == POST: 673 if self.requestMethod == POSTDIRECTLY: 674 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 675 request.add_header("Content-Type", "application/sparql-query") 676 request.data = self.queryString.encode('UTF-8') 677 else: # URL-encoded 678 request = urllib2.Request(uri) 679 request.add_header("Content-Type", "application/x-www-form-urlencoded") 680 request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii') 681 else: # GET 682 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString))) 683 684 request.add_header("User-Agent", self.agent) 685 request.add_header("Accept", self._getAcceptHeader()) 686 if self.user and self.passwd: 687 if self.http_auth == BASIC: 688 credentials = "%s:%s" % (self.user, self.passwd) 689 request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8')).decode('utf-8')) 690 elif self.http_auth == DIGEST: 691 realm = "SPARQL" 692 pwd_mgr = urllib2.HTTPPasswordMgr() 693 pwd_mgr.add_password(realm, uri, self.user, self.passwd) 694 opener = urllib2.build_opener() 695 opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr)) 696 urllib2.install_opener(opener) 697 else: 698 valid_types = ", ".join(_allowedAuth) 699 raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types, 700 self.http_auth)) 701 702 return request
703
704 - def _query(self):
705 """Internal method to execute the query. Returns the output of the 706 C{urllib2.urlopen} method of the standard Python library 707 708 @return: tuples with the raw request plus the expected format 709 """ 710 if self.timeout: 711 socket.setdefaulttimeout(self.timeout) 712 request = self._createRequest() 713 714 try: 715 response = urlopener(request) 716 return response, self.returnFormat 717 except urllib2.HTTPError, e: 718 if e.code == 400: 719 raise QueryBadFormed(e.read()) 720 elif e.code == 404: 721 raise EndPointNotFound(e.read()) 722 elif e.code == 500: 723 raise EndPointInternalError(e.read()) 724 else: 725 raise e
726
727 - def query(self):
728 """ 729 Execute the query. 730 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the 731 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint). 732 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too. 733 734 Note that some combinations of return formats and query types may not make sense. For example, 735 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT 736 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization 737 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is 738 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall 739 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which 740 one that is.) 741 742 @return: query result 743 @rtype: L{QueryResult} instance 744 """ 745 return QueryResult(self._query())
746
747 - def queryAndConvert(self):
748 """Macro like method: issue a query and return the converted results. 749 @return: the converted query result. See the conversion methods for more details. 750 """ 751 res = self.query() 752 return res.convert()
753 754 ####################################################################################################### 755 756
757 -class QueryResult(object):
758 """ 759 Wrapper around an a query result. Users should not create instances of this class, it is 760 generated by a L{SPARQLWrapper.query} call. The results can be 761 converted to various formats, or used directly. 762 763 If used directly: the class gives access to the direct http request results 764 L{self.response}: it is a file-like object with two additional methods: C{geturl()} to 765 return the URL of the resource retrieved and 766 C{info()} that returns the meta-information of the HTTP result as a dictionary-like object 767 (see the urllib2 standard library module of Python). 768 769 For convenience, these methods are also available on the instance. The C{__iter__} and 770 C{next} methods are also implemented (by mapping them to L{self.response}). This means that the 771 common idiom:: 772 for l in obj : do_something_with_line(l) 773 would work, too. 774 775 @ivar response: the direct HTTP response; a file-like object, as return by the C{urllib2.urlopen} library call. 776 @ivar requestedFormat: The requested format. The possible values are: L{JSON}, L{XML}, L{RDFXML}, L{TURTLE}, L{N3}, L{RDF}, L{CSV}, L{TSV}, L{JSONLD}. 777 @type requestedFormat: string 778 779 """
780 - def __init__(self,result):
781 """ 782 @param result: HTTP response stemming from a L{SPARQLWrapper.query} call, or a tuple with the expected format: (response,format) 783 """ 784 if (type(result) == tuple): 785 self.response = result[0] 786 self.requestedFormat = result[1] 787 else: 788 self.response = result 789 """Direct response, see class comments for details"""
790
791 - def geturl(self):
792 """Return the URL of the original call. 793 @return: URL of the original call 794 @rtype: string 795 """ 796 return self.response.geturl()
797
798 - def info(self):
799 """Return the meta-information of the HTTP result. 800 @return: meta information of the HTTP result 801 @rtype: dictionary 802 """ 803 return KeyCaseInsensitiveDict(self.response.info())
804
805 - def __iter__(self):
806 """Return an iterator object. This method is expected for the inclusion 807 of the object in a standard C{for} loop. 808 """ 809 return self.response.__iter__()
810
811 - def next(self):
812 """Method for the standard iterator.""" 813 return self.response.next()
814
815 - def _convertJSON(self):
816 """ 817 Convert a JSON result into a Python dict. This method can be overwritten in a subclass 818 for a different conversion method. 819 @return: converted result 820 @rtype: Python dictionary 821 """ 822 return json.loads(self.response.read().decode("utf-8"))
823
824 - def _convertXML(self):
825 """ 826 Convert an XML result into a Python dom tree. This method can be overwritten in a 827 subclass for a different conversion method. 828 @return: converted result 829 @rtype: PyXlib DOM node 830 """ 831 from xml.dom.minidom import parse 832 return parse(self.response)
833
834 - def _convertRDF(self):
835 """ 836 Convert a RDF/XML result into an RDFLib triple store. This method can be overwritten 837 in a subclass for a different conversion method. 838 @return: converted result 839 @rtype: RDFLib C{Graph} 840 """ 841 try: 842 from rdflib.graph import ConjunctiveGraph 843 except ImportError: 844 from rdflib import ConjunctiveGraph 845 retval = ConjunctiveGraph() 846 # (DEPRECATED) this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny 847 # (DEPRECATED) (and, as far as I could see, meaningless) error message... 848 retval.load(self.response) # (DEPRECATED) publicID=' ') 849 return retval
850
851 - def _convertN3(self):
852 """ 853 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass 854 for a different conversion method. 855 @return: converted result 856 @rtype: string 857 """ 858 return self.response.read()
859
860 - def _convertCSV(self):
861 """ 862 Convert a CSV result into a string. This method can be overwritten in a subclass 863 for a different conversion method. 864 @return: converted result 865 @rtype: string 866 """ 867 return self.response.read()
868
869 - def _convertTSV(self):
870 """ 871 Convert a TSV result into a string. This method can be overwritten in a subclass 872 for a different conversion method. 873 @return: converted result 874 @rtype: string 875 """ 876 return self.response.read()
877
878 - def _convertJSONLD(self):
879 """ 880 Convert a RDF JSON-LD result into an RDFLib triple store. This method can be overwritten 881 in a subclass for a different conversion method. 882 @return: converted result 883 @rtype: RDFLib Graph 884 """ 885 from rdflib import ConjunctiveGraph 886 retval = ConjunctiveGraph() 887 retval.load(self.response, format='json-ld')# (DEPRECATED), publicID=' ') 888 return retval
889
890 - def convert(self):
891 """ 892 Encode the return value depending on the return format: 893 - in the case of XML, a DOM top element is returned; 894 - in the case of JSON, a simplejson conversion will return a dictionary; 895 - in the case of RDF/XML, the value is converted via RDFLib into a C{Graph} instance; 896 - in the case of RDF Turtle/N3, a string is returned; 897 - in the case of CSV/TSV, a string is returned. 898 In all other cases the input simply returned. 899 900 @return: the converted query result. See the conversion methods for more details. 901 """ 902 def _content_type_in_list(real, expected): 903 return True in [real.find(mime) != -1 for mime in expected]
904 905 def _validate_format(format_name, allowed, mime, requested): 906 if requested not in allowed: 907 message = "Format requested was %s, but %s (%s) has been returned by the endpoint" 908 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
909 910 # TODO. In order to compare properly, the requested QueryType (SPARQL Query Form) is needed. For instance, the unexpected N3 requested for a SELECT would return XML 911 if "content-type" in self.info(): 912 ct = self.info()["content-type"] # returned Content-Type value 913 914 if _content_type_in_list(ct, _SPARQL_XML): 915 _validate_format("XML", [XML], ct, self.requestedFormat) 916 return self._convertXML() 917 elif _content_type_in_list(ct, _XML): 918 _validate_format("XML", [XML], ct, self.requestedFormat) 919 return self._convertXML() 920 elif _content_type_in_list(ct, _SPARQL_JSON): 921 _validate_format("JSON", [JSON], ct, self.requestedFormat) 922 return self._convertJSON() 923 elif _content_type_in_list(ct, _RDF_XML): 924 _validate_format("RDF/XML", [RDF, XML, RDFXML], ct, self.requestedFormat) 925 return self._convertRDF() 926 elif _content_type_in_list(ct, _RDF_N3): 927 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat) 928 return self._convertN3() 929 elif _content_type_in_list(ct, _CSV): 930 _validate_format("CSV", [CSV], ct, self.requestedFormat) 931 return self._convertCSV() 932 elif _content_type_in_list(ct, _TSV): 933 _validate_format("TSV", [TSV], ct, self.requestedFormat) 934 return self._convertTSV() 935 elif _content_type_in_list(ct, _RDF_JSONLD): 936 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat) 937 return self._convertJSONLD() 938 else: 939 warnings.warn("unknown response content type '%s' returning raw response..." %(ct), RuntimeWarning) 940 return self.response.read() 941
942 - def print_results(self, minWidth=None):
943 results = self._convertJSON() 944 if minWidth : 945 width = self.__get_results_width(results, minWidth) 946 else : 947 width = self.__get_results_width(results) 948 index = 0 949 for var in results["head"]["vars"] : 950 print ("?" + var).ljust(width[index]),"|", 951 index += 1 952 print 953 print "=" * (sum(width) + 3 * len(width)) 954 for result in results["results"]["bindings"] : 955 index = 0 956 for var in results["head"]["vars"] : 957 result = self.__get_prettyprint_string_sparql_var_result(result[var]) 958 print result.ljust(width[index]),"|", 959 index += 1 960 print
961
962 - def __get_results_width(self, results, minWidth=2):
963 width = [] 964 for var in results["head"]["vars"] : 965 width.append(max(minWidth, len(var)+1)) 966 for result in results["results"]["bindings"] : 967 index = 0 968 for var in results["head"]["vars"] : 969 result = self.__get_prettyprint_string_sparql_var_result(result[var]) 970 width[index] = max(width[index], len(result)) 971 index += 1 972 return width
973
974 - def __get_prettyprint_string_sparql_var_result(self, result):
975 value = result["value"] 976 lang = result.get("xml:lang", None) 977 datatype = result.get("datatype",None) 978 if lang is not None: 979 value+="@"+lang 980 if datatype is not None: 981 value+=" ["+datatype+"]" 982 return value
983