1
2
3
4 """
5 @var JSON: to be used to set the return format to JSON
6 @var XML: to be used to set the return format to XML (SPARQL XML format or RDF/XML, depending on the query type). This is the default.
7 @var RDFXML: to be used to set the return format to RDF/XML explicitly.
8 @var TURTLE: to be used to set the return format to Turtle
9 @var N3: to be used to set the return format to N3 (for most of the SPARQL services this is equivalent to Turtle)
10 @var RDF: to be used to set the return RDF Graph
11 @var CSV: to be used to set the return format to CSV
12 @var TSV: to be used to set the return format to TSV
13 @var JSONLD: to be used to set the return format to JSON-LD
14
15 @var POST: to be used to set HTTP POST
16 @var GET: to be used to set HTTP GET. This is the default.
17
18 @var SELECT: to be used to set the query type to SELECT. This is, usually, determined automatically.
19 @var CONSTRUCT: to be used to set the query type to CONSTRUCT. This is, usually, determined automatically.
20 @var ASK: to be used to set the query type to ASK. This is, usually, determined automatically.
21 @var DESCRIBE: to be used to set the query type to DESCRIBE. This is, usually, determined automatically.
22
23 @var INSERT: to be used to set the query type to INSERT.
24 @var DELETE: to be used to set the query type to DELETE.
25 @var CREATE: to be used to set the query type to CREATE.
26 @var CLEAR: to be used to set the query type to CLEAR.
27 @var DROP: to be used to set the query type to DROP.
28 @var LOAD: to be used to set the query type to LOAD.
29 @var COPY: to be used to set the query type to COPY.
30 @var MOVE: to be used to set the query type to MOVE.
31 @var ADD: to be used to set the query type to ADD.
32
33
34 @var BASIC: BASIC HTTP Authentication method
35 @var DIGEST: DIGEST HTTP Authentication method
36
37 @see: U{SPARQL Specification<http://www.w3.org/TR/rdf-sparql-query/>}
38 @authors: U{Ivan Herman<http://www.ivan-herman.net>}, U{Sergio Fernández<http://www.wikier.org>}, U{Carlos Tejo Alonso<http://www.dayures.net>}
39 @organization: U{World Wide Web Consortium<http://www.w3.org>}, U{Salzburg Research<http://www.salzburgresearch.at>} and U{Foundation CTIC<http://www.fundacionctic.org/>}.
40 @license: U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/copyright-software">}
41 @requires: U{RDFLib<http://rdflib.net>} package.
42 """
43
44 import urllib
45 import urllib2
46 from urllib2 import urlopen as urlopener
47 import base64
48 import re
49 import sys
50 import warnings
51
52 import json
53 from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict
54 from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError, Unauthorized
55 from SPARQLWrapper import __agent__
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142 JSON = "json"
143 JSONLD = "json-ld"
144 XML = "xml"
145 TURTLE = "turtle"
146 N3 = "n3"
147 RDF = "rdf"
148 RDFXML = "rdf+xml"
149 CSV = "csv"
150 TSV = "tsv"
151 _allowedFormats = [JSON, XML, TURTLE, N3, RDF, RDFXML, CSV, TSV]
152
153
154 POST = "POST"
155 GET = "GET"
156 _allowedRequests = [POST, GET]
157
158
159 BASIC = "BASIC"
160 DIGEST = "DIGEST"
161 _allowedAuth = [BASIC, DIGEST]
162
163
164 SELECT = "SELECT"
165 CONSTRUCT = "CONSTRUCT"
166 ASK = "ASK"
167 DESCRIBE = "DESCRIBE"
168 INSERT = "INSERT"
169 DELETE = "DELETE"
170 CREATE = "CREATE"
171 CLEAR = "CLEAR"
172 DROP = "DROP"
173 LOAD = "LOAD"
174 COPY = "COPY"
175 MOVE = "MOVE"
176 ADD = "ADD"
177 _allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP,
178 LOAD, COPY, MOVE, ADD]
179
180
181 URLENCODED = "urlencoded"
182 POSTDIRECTLY = "postdirectly"
183 _REQUEST_METHODS = [URLENCODED, POSTDIRECTLY]
184
185
186
187
188
189
190
191
192
193 _SPARQL_DEFAULT = ["application/sparql-results+xml", "application/rdf+xml", "*/*"]
194 _SPARQL_XML = ["application/sparql-results+xml"]
195 _SPARQL_JSON = ["application/sparql-results+json", "application/json", "text/javascript", "application/javascript"]
196 _RDF_XML = ["application/rdf+xml"]
197 _RDF_N3 = ["text/rdf+n3", "application/n-triples", "application/turtle", "application/n3", "text/n3", "text/turtle"]
198 _RDF_JSONLD = ["application/x-json+ld", "application/ld+json"]
199 _CSV = ["text/csv"]
200 _TSV = ["text/tab-separated-values"]
201 _XML = ["application/xml"]
202 _ALL = ["*/*"]
203 _RDF_POSSIBLE = _RDF_XML + _RDF_N3 + _XML
204
205 _SPARQL_PARAMS = ["query"]
206
207 try:
208 import rdflib_jsonld
209 _allowedFormats.append(JSONLD)
210 _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD
211 except ImportError:
212
213 pass
214
215
216
217
218
219
220
221 _returnFormatSetting = ["format", "output", "results"]
222
223
224
225
227 """
228 Wrapper around an online access to a SPARQL Web entry point.
229
230 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc,
231 are retained from one query to the next (in other words, only the query string changes). The instance can also be
232 reset to its initial values using the L{resetQuery} method.
233
234 @cvar pattern: regular expression used to determine whether a query is of type L{CONSTRUCT}, L{SELECT}, L{ASK}, or L{DESCRIBE}.
235 @type pattern: compiled regular expression (see the C{re} module of Python)
236 @ivar endpoint: SPARQL endpoint's URI
237 @type endpoint: string
238 @ivar updateEndpoint: SPARQL endpoint's URI for update operations (if it's a different one). Default is C{None}
239 @type updateEndpoint: string
240 @ivar agent: The User-Agent for the HTTP request header.
241 @type agent: string
242 @ivar _defaultGraph: URI for the default graph. Default is C{None}, the value can be set either via an L{explicit call<addParameter>}("default-graph-uri", uri) or as part of the query string.
243 @type _defaultGraph: string
244 @ivar user: The username of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}.
245 @type user: string
246 @ivar passwd: The password of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}.
247 @type passwd: string
248 @ivar http_auth: HTTP Authentication type. The default value is L{BASIC}. Possible values are L{BASIC} or L{DIGEST}
249 @type http_auth: string
250 @ivar onlyConneg: Option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters).The default value is L{False}.
251 @type onlyConneg: boolean
252 @ivar customHttpHeaders: Custom HTTP Headers to be included in the request. Important: These headers override previous values (including C{Content-Type}, C{User-Agent}, C{Accept} and C{Authorization} if they are present). It is a dictionary where keys are the header field nada and values are the header values.
253 @type customHttpHeaders: dict
254 """
255 pattern = re.compile(r"""
256 ((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))*
257 (?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD))
258 """, re.VERBOSE | re.IGNORECASE)
259 comments_pattern = re.compile(r"(^|\n)\s*#.*?\n")
260
261 - def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
262 """
263 Class encapsulating a full SPARQL call.
264 @param endpoint: string of the SPARQL endpoint's URI
265 @type endpoint: string
266 @param updateEndpoint: string of the SPARQL endpoint's URI for update operations (if it's a different one)
267 @type updateEndpoint: string
268 @param returnFormat: Default: L{XML}.
269 Can be set to JSON or Turtle/N3
270
271 No local check is done, the parameter is simply
272 sent to the endpoint. Eg, if the value is set to JSON and a construct query is issued, it
273 is up to the endpoint to react or not, this wrapper does not check.
274
275 Possible values:
276 L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDFXML}, L{CSV}, L{TSV} (constants in this module). The value can also be set via explicit
277 call, see below.
278 @type returnFormat: string
279 @param defaultGraph: URI for the default graph. Default is None, the value can be set either via an L{explicit call<addDefaultGraph>} or as part of the query string.
280 @type defaultGraph: string
281 @param agent: The User-Agent for the HTTP request header.
282 @type agent: string
283 """
284 self.endpoint = endpoint
285 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint
286 self.agent = agent
287 self.user = None
288 self.passwd = None
289 self.http_auth = BASIC
290 self._defaultGraph = defaultGraph
291 self.onlyConneg = False
292 self.customHttpHeaders = {}
293
294 if returnFormat in _allowedFormats:
295 self._defaultReturnFormat = returnFormat
296 else:
297 self._defaultReturnFormat = XML
298
299 self.resetQuery()
300
302 """Reset the query, ie, return format, method, query, default or named graph settings, etc,
303 are reset to their default values.
304 """
305 self.parameters = {}
306 if self._defaultGraph:
307 self.addParameter("default-graph-uri", self._defaultGraph)
308 self.returnFormat = self._defaultReturnFormat
309 self.method = GET
310 self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""")
311 self.timeout = None
312 self.requestMethod = URLENCODED
313
314
328
338
340 """Set the timeout (in seconds) to use for querying the endpoint.
341
342 @param timeout: Timeout in seconds.
343 @type timeout: int
344 """
345 self.timeout = int(timeout)
346
348 """Set this option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters).
349 @since: 1.8.1
350
351 @param onlyConneg: True if only HTTP Content Negotiation is allowed; False is HTTP parameters are allowed also.
352 @type onlyConneg: bool
353 """
354 self.onlyConneg = onlyConneg
355
357 """Set the internal method to use to perform the request for query or
358 update operations, either URL-encoded (C{SPARQLWrapper.URLENCODED}) or
359 POST directly (C{SPARQLWrapper.POSTDIRECTLY}).
360 Further details at U{http://www.w3.org/TR/sparql11-protocol/#query-operation}
361 and U{http://www.w3.org/TR/sparql11-protocol/#update-operation}.
362
363 @param method: Possible values are C{SPARQLWrapper.URLENCODED} (URL-encoded) or C{SPARQLWrapper.POSTDIRECTLY} (POST directly). All other cases are ignored.
364 @type method: string
365 """
366 if method in _REQUEST_METHODS:
367 self.requestMethod = method
368 else:
369 warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
370
372 """
373 Add a default graph URI.
374 @param uri: URI of the graph
375 @type uri: string
376 @deprecated: use addParameter("default-graph-uri", uri) instead of this method
377 """
378 self.addParameter("default-graph-uri", uri)
379
381 """
382 Add a named graph URI.
383 @param uri: URI of the graph
384 @type uri: string
385 @deprecated: use addParameter("named-graph-uri", uri) instead of this method
386 """
387 self.addParameter("named-graph-uri", uri)
388
390 """
391 Some SPARQL endpoints require extra key value pairs.
392 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing
393 virtuoso to retrieve graphs that are not stored in its local database.
394 Alias of L{SPARQLWrapper.addParameter} method.
395 @param key: key of the query part
396 @type key: string
397 @param value: value of the query part
398 @type value: string
399 @deprecated: use addParameter(key, value) instead of this method
400 """
401 self.addParameter(key, value)
402
404 """
405 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding.
406 @param name: name
407 @type name: string
408 @param value: value
409 @type value: string
410 @return: Returns a boolean indicating if the adding has been accomplished.
411 @rtype: bool
412 @deprecated: use addParameter(name, value) instead of this method
413 """
414 self.clearParameter(name)
415 return self.addParameter(name, value)
416
418 """
419 Some SPARQL endpoints allow extra key value pairs.
420 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing
421 virtuoso to retrieve graphs that are not stored in its local database.
422 If the param C{query} is tried to be set, this intent is dismissed.
423 Returns a boolean indicating if the set has been accomplished.
424 @param name: name
425 @type name: string
426 @param value: value
427 @type value: string
428 @return: Returns a boolean indicating if the adding has been accomplished.
429 @rtype: bool
430 """
431 if name in _SPARQL_PARAMS:
432 return False
433 else:
434 if name not in self.parameters:
435 self.parameters[name] = []
436 self.parameters[name].append(value)
437 return True
438
440 """
441 Add a custom HTTP header (this method can override all HTTP headers).
442 IMPORTANT: Take into acount that each previous value for the header field names
443 C{Content-Type}, C{User-Agent}, C{Accept} and C{Authorization} would be overriden
444 if the header field name is present as value of the parameter C{httpHeaderName}.
445 @since: 1.8.2
446
447 @param httpHeaderName: The header field name.
448 @type httpHeaderName: string
449 @param httpHeaderValue: The header field value.
450 @type httpHeaderValue: string
451 """
452 self.customHttpHeaders[httpHeaderName] = httpHeaderValue
453
455 """
456 Clear the values of a custom Http Header previously setted.
457 Returns a boolean indicating if the clearing has been accomplished.
458 @since: 1.8.2
459
460 @param httpHeaderName: name
461 @type httpHeaderName: string
462 @return: Returns a boolean indicating if the clearing has been accomplished.
463 @rtype: bool
464 """
465 try:
466 del self.customHttpHeaders[httpHeaderName]
467 return True
468 except KeyError:
469 return False
470
472 """
473 Clear the values of a concrete parameter.
474 Returns a boolean indicating if the clearing has been accomplished.
475 @param name: name
476 @type name: string
477 @return: Returns a boolean indicating if the clearing has been accomplished.
478 @rtype: bool
479 """
480 if name in _SPARQL_PARAMS:
481 return False
482 else:
483 try:
484 del self.parameters[name]
485 return True
486 except KeyError:
487 return False
488
490 """
491 Set the credentials for querying the current endpoint.
492 @param user: username
493 @type user: string
494 @param passwd: password
495 @type passwd: string
496 """
497 self.user = user
498 self.passwd = passwd
499
501 """
502 Set the HTTP Authentication type. Possible values are L{BASIC} or L{DIGEST}.
503 @param auth: auth type
504 @type auth: string
505 @raise TypeError: If the C{auth} parameter is not an string.
506 @raise ValueError: If the C{auth} parameter has not one of the valid values: L{BASIC} or L{DIGEST}.
507 """
508 if not isinstance(auth, str):
509 raise TypeError('setHTTPAuth takes a string')
510 elif auth.upper() in _allowedAuth:
511 self.http_auth = auth.upper()
512 else:
513 valid_types = ", ".join(_allowedAuth)
514 raise ValueError("Value should be one of {0}".format(valid_types))
515
517 """
518 Set the SPARQL query text. Note: no check is done on the validity of the query
519 (syntax or otherwise) by this module, except for testing the query type (SELECT,
520 ASK, etc). Syntax and validity checking is done by the SPARQL service itself.
521 @param query: query text
522 @type query: string
523 @raise TypeError: If the C{query} parameter is not an unicode-string or utf-8 encoded byte-string.
524 """
525 if sys.version < '3':
526 if isinstance(query, unicode):
527 pass
528 elif isinstance(query, str):
529 query = query.decode('utf-8')
530 else:
531 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings')
532 else:
533 if isinstance(query, str):
534 pass
535 elif isinstance(query, bytes):
536 query = query.decode('utf-8')
537 else:
538 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings')
539
540 self.queryString = query
541 self.queryType = self._parseQueryType(query)
542
544 """
545 Internal method for parsing the SPARQL query and return its type (ie, L{SELECT}, L{ASK}, etc).
546
547 Note that the method returns L{SELECT} if nothing is specified. This is just to get all other
548 methods running; in fact, this means that the query is erroneous, because the query must be,
549 according to the SPARQL specification, one of Select, Ask, Describe, or Construct. The
550 SPARQL endpoint should raise an exception (via urllib) for such syntax error.
551
552 @param query: query text
553 @type query: string
554 @return: the type of SPARQL query (aka SPARQL query form)
555 @rtype: string
556 """
557 try:
558 query = query if (isinstance(query, str)) else query.encode('ascii', 'ignore')
559 query = self._cleanComments(query)
560 r_queryType = self.pattern.search(query).group("queryType").upper()
561 except AttributeError:
562 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning)
563 r_queryType = None
564
565 if r_queryType in _allowedQueryTypes:
566 return r_queryType
567 else:
568
569 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning)
570 return SELECT
571
573 """Set the invocation method. By default, this is L{GET}, but can be set to L{POST}.
574 @param method: should be either L{GET} or L{POST}. Other cases are ignored.
575 @type method: string
576 """
577 if method in _allowedRequests:
578 self.method = method
579
581 """Make urllib2 use keep-alive.
582 @raise ImportError: when could not be imported keepalive.HTTPHandler
583 """
584 try:
585 from keepalive import HTTPHandler
586
587 if urllib2._opener and any(isinstance(h, HTTPHandler) for h in urllib2._opener.handlers):
588
589 return
590
591 keepalive_handler = HTTPHandler()
592 opener = urllib2.build_opener(keepalive_handler)
593 urllib2.install_opener(opener)
594 except ImportError:
595 warnings.warn("keepalive support not available, so the execution of this method has no effect")
596
598 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request.
599 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request
600 @rtype: bool
601 """
602 return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]
603
605 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request.
606 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request.
607 @rtype: bool
608 """
609 return not self.isSparqlUpdateRequest()
610
619
621 """ Internal method for getting the request encoded parameters.
622 @param query: The query
623 @type query: string
624 """
625 query_parameters = self.parameters.copy()
626
627 if query and (isinstance(query, tuple)) and len(query) == 2:
628
629 query_parameters[query[0]] = [query[1]]
630
631
632
633
634
635 if not self.onlyConneg:
636 for f in _returnFormatSetting:
637 query_parameters[f] = [self.returnFormat]
638
639
640
641 if self.returnFormat in [TSV, JSONLD]:
642 acceptHeader = self._getAcceptHeader()
643 if "*/*" in acceptHeader:
644 acceptHeader = ""
645 query_parameters[f] += [acceptHeader]
646
647 pairs = (
648 "%s=%s" % (
649 urllib.quote_plus(param.encode('UTF-8'), safe='/'),
650 urllib.quote_plus(value.encode('UTF-8'), safe='/')
651 )
652 for param, values in query_parameters.items() for value in values
653 )
654
655 return '&'.join(pairs)
656
658 """ Internal method for getting the HTTP Accept Header.
659 @see: U{Hypertext Transfer Protocol -- HTTP/1.1 - Header Field Definitions<https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1>}
660 """
661 if self.queryType in [SELECT, ASK]:
662 if self.returnFormat == XML:
663 acceptHeader = ",".join(_SPARQL_XML)
664 elif self.returnFormat == JSON:
665 acceptHeader = ",".join(_SPARQL_JSON)
666 elif self.returnFormat == CSV:
667 acceptHeader = ",".join(_CSV)
668 elif self.returnFormat == TSV:
669 acceptHeader = ",".join(_TSV)
670 else:
671 acceptHeader = ",".join(_ALL)
672 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning)
673 elif self.queryType in [INSERT, DELETE]:
674 acceptHeader = "*/*"
675 else:
676 if self.returnFormat == N3 or self.returnFormat == TURTLE:
677 acceptHeader = ",".join(_RDF_N3)
678 elif self.returnFormat == XML:
679 acceptHeader = ",".join(_RDF_XML)
680 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats:
681 acceptHeader = ",".join(_RDF_JSONLD)
682 else:
683 acceptHeader = ",".join(_ALL)
684 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning)
685 return acceptHeader
686
688 """Internal method to create request according a HTTP method. Returns a
689 C{urllib2.Request} object of the urllib2 Python library
690 @raise NotImplementedError: If the C{HTTP authentification} method is not one of the valid values: L{BASIC} or L{DIGEST}.
691 @return: request a C{urllib2.Request} object of the urllib2 Python library
692 """
693 request = None
694
695 if self.isSparqlUpdateRequest():
696
697 uri = self.updateEndpoint
698
699 if self.method != POST:
700 warnings.warn("update operations MUST be done by POST")
701
702 if self.requestMethod == POSTDIRECTLY:
703 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters())
704 request.add_header("Content-Type", "application/sparql-update")
705 request.data = self.queryString.encode('UTF-8')
706 else:
707 request = urllib2.Request(uri)
708 request.add_header("Content-Type", "application/x-www-form-urlencoded")
709 request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii')
710 else:
711
712 uri = self.endpoint
713
714 if self.method == POST:
715 if self.requestMethod == POSTDIRECTLY:
716 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters())
717 request.add_header("Content-Type", "application/sparql-query")
718 request.data = self.queryString.encode('UTF-8')
719 else:
720 request = urllib2.Request(uri)
721 request.add_header("Content-Type", "application/x-www-form-urlencoded")
722 request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii')
723 else:
724 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString)))
725
726 request.add_header("User-Agent", self.agent)
727 request.add_header("Accept", self._getAcceptHeader())
728 if self.user and self.passwd:
729 if self.http_auth == BASIC:
730 credentials = "%s:%s" % (self.user, self.passwd)
731 request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8')).decode('utf-8'))
732 elif self.http_auth == DIGEST:
733 realm = "SPARQL"
734 pwd_mgr = urllib2.HTTPPasswordMgr()
735 pwd_mgr.add_password(realm, uri, self.user, self.passwd)
736 opener = urllib2.build_opener()
737 opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr))
738 urllib2.install_opener(opener)
739 else:
740 valid_types = ", ".join(_allowedAuth)
741 raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types,
742 self.http_auth))
743
744
745 for customHttpHeader in self.customHttpHeaders:
746 request.add_header(customHttpHeader, self.customHttpHeaders[customHttpHeader])
747
748 return request
749
751 """Internal method to execute the query. Returns the output of the
752 C{urllib2.urlopen} method of the standard Python library
753
754 @return: tuples with the raw request plus the expected format.
755 @raise QueryBadFormed: If the C{HTTP return code} is C{400}.
756 @raise Unauthorized: If the C{HTTP return code} is C{401}.
757 @raise EndPointNotFound: If the C{HTTP return code} is C{404}.
758 @raise EndPointInternalError: If the C{HTTP return code} is C{500}.
759 """
760 request = self._createRequest()
761
762 try:
763 if self.timeout:
764 response = urlopener(request, timeout=self.timeout)
765 else:
766 response = urlopener(request)
767 return response, self.returnFormat
768 except urllib2.HTTPError, e:
769 if e.code == 400:
770 raise QueryBadFormed(e.read())
771 elif e.code == 404:
772 raise EndPointNotFound(e.read())
773 elif e.code == 401:
774 raise Unauthorized(e.read())
775 elif e.code == 500:
776 raise EndPointInternalError(e.read())
777 else:
778 raise e
779
781 """
782 Execute the query.
783 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the
784 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint).
785 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too.
786
787 Note that some combinations of return formats and query types may not make sense. For example,
788 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT
789 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization
790 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is
791 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall
792 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which
793 one that is.)
794
795 @return: query result
796 @rtype: L{QueryResult} instance
797 """
798 return QueryResult(self._query())
799
801 """Macro like method: issue a query and return the converted results.
802 @return: the converted query result. See the conversion methods for more details.
803 """
804 res = self.query()
805 return res.convert()
806
807
808
809
811 """
812 Wrapper around an a query result. Users should not create instances of this class, it is
813 generated by a L{SPARQLWrapper.query} call. The results can be
814 converted to various formats, or used directly.
815
816 If used directly: the class gives access to the direct http request results
817 L{self.response}: it is a file-like object with two additional methods: C{geturl()} to
818 return the URL of the resource retrieved and
819 C{info()} that returns the meta-information of the HTTP result as a dictionary-like object
820 (see the urllib2 standard library module of Python).
821
822 For convenience, these methods are also available on the instance. The C{__iter__} and
823 C{next} methods are also implemented (by mapping them to L{self.response}). This means that the
824 common idiom::
825 for l in obj : do_something_with_line(l)
826 would work, too.
827
828 @ivar response: the direct HTTP response; a file-like object, as return by the C{urllib2.urlopen} library call.
829 @ivar requestedFormat: The requested format. The possible values are: L{JSON}, L{XML}, L{RDFXML}, L{TURTLE}, L{N3}, L{RDF}, L{CSV}, L{TSV}, L{JSONLD}.
830 @type requestedFormat: string
831
832 """
834 """
835 @param result: HTTP response stemming from a L{SPARQLWrapper.query} call, or a tuple with the expected format: (response,format)
836 """
837 if isinstance(result, tuple):
838 self.response = result[0]
839 self.requestedFormat = result[1]
840 else:
841 self.response = result
842 """Direct response, see class comments for details"""
843
845 """Return the URL of the original call.
846 @return: URL of the original call
847 @rtype: string
848 """
849 return self.response.geturl()
850
852 """Return the meta-information of the HTTP result.
853 @return: meta information of the HTTP result
854 @rtype: dict
855 """
856 return KeyCaseInsensitiveDict(self.response.info())
857
859 """Return an iterator object. This method is expected for the inclusion
860 of the object in a standard C{for} loop.
861 """
862 return self.response.__iter__()
863
865 """Method for the standard iterator."""
866 return self.response.next()
867
869 """
870 Convert a JSON result into a Python dict. This method can be overwritten in a subclass
871 for a different conversion method.
872 @return: converted result
873 @rtype: dict
874 """
875 return json.loads(self.response.read().decode("utf-8"))
876
878 """
879 Convert an XML result into a Python dom tree. This method can be overwritten in a
880 subclass for a different conversion method.
881 @return: converted result
882 @rtype: PyXlib DOM node
883 """
884 from xml.dom.minidom import parse
885 return parse(self.response)
886
888 """
889 Convert a RDF/XML result into an RDFLib triple store. This method can be overwritten
890 in a subclass for a different conversion method.
891 @return: converted result
892 @rtype: RDFLib C{Graph}
893 """
894 try:
895 from rdflib.graph import ConjunctiveGraph
896 except ImportError:
897 from rdflib import ConjunctiveGraph
898 retval = ConjunctiveGraph()
899
900
901 retval.load(self.response)
902 return retval
903
905 """
906 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass
907 for a different conversion method.
908 @return: converted result
909 @rtype: string
910 """
911 return self.response.read()
912
914 """
915 Convert a CSV result into a string. This method can be overwritten in a subclass
916 for a different conversion method.
917 @return: converted result
918 @rtype: string
919 """
920 return self.response.read()
921
923 """
924 Convert a TSV result into a string. This method can be overwritten in a subclass
925 for a different conversion method.
926 @return: converted result
927 @rtype: string
928 """
929 return self.response.read()
930
932 """
933 Convert a RDF JSON-LD result into an RDFLib triple store. This method can be overwritten
934 in a subclass for a different conversion method.
935 @return: converted result
936 @rtype: RDFLib Graph
937 """
938 from rdflib import ConjunctiveGraph
939 retval = ConjunctiveGraph()
940 retval.load(self.response, format='json-ld')
941 return retval
942
944 """
945 Encode the return value depending on the return format:
946 - in the case of XML, a DOM top element is returned;
947 - in the case of JSON, a simplejson conversion will return a dictionary;
948 - in the case of RDF/XML, the value is converted via RDFLib into a C{Graph} instance;
949 - in the case of RDF Turtle/N3, a string is returned;
950 - in the case of CSV/TSV, a string is returned.
951 In all other cases the input simply returned.
952
953 @return: the converted query result. See the conversion methods for more details.
954 """
955 def _content_type_in_list(real, expected):
956 return True in [real.find(mime) != -1 for mime in expected]
957
958 def _validate_format(format_name, allowed, mime, requested):
959 if requested not in allowed:
960 message = "Format requested was %s, but %s (%s) has been returned by the endpoint"
961 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
962
963
964 if "content-type" in self.info():
965 ct = self.info()["content-type"]
966
967 if _content_type_in_list(ct, _SPARQL_XML):
968 _validate_format("XML", [XML], ct, self.requestedFormat)
969 return self._convertXML()
970 elif _content_type_in_list(ct, _XML):
971 _validate_format("XML", [XML], ct, self.requestedFormat)
972 return self._convertXML()
973 elif _content_type_in_list(ct, _SPARQL_JSON):
974 _validate_format("JSON", [JSON], ct, self.requestedFormat)
975 return self._convertJSON()
976 elif _content_type_in_list(ct, _RDF_XML):
977 _validate_format("RDF/XML", [RDF, XML, RDFXML], ct, self.requestedFormat)
978 return self._convertRDF()
979 elif _content_type_in_list(ct, _RDF_N3):
980 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat)
981 return self._convertN3()
982 elif _content_type_in_list(ct, _CSV):
983 _validate_format("CSV", [CSV], ct, self.requestedFormat)
984 return self._convertCSV()
985 elif _content_type_in_list(ct, _TSV):
986 _validate_format("TSV", [TSV], ct, self.requestedFormat)
987 return self._convertTSV()
988 elif _content_type_in_list(ct, _RDF_JSONLD):
989 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat)
990 return self._convertJSONLD()
991 else:
992 warnings.warn("unknown response content type '%s' returning raw response..." %(ct), RuntimeWarning)
993 return self.response.read()
994
996 results = self._convertJSON()
997 if minWidth:
998 width = self.__get_results_width(results, minWidth)
999 else:
1000 width = self.__get_results_width(results)
1001 index = 0
1002 for var in results["head"]["vars"]:
1003 print ("?" + var).ljust(width[index]), "|",
1004 index += 1
1005 print
1006 print "=" * (sum(width) + 3 * len(width))
1007 for result in results["results"]["bindings"]:
1008 index = 0
1009 for var in results["head"]["vars"]:
1010 result = self.__get_prettyprint_string_sparql_var_result(result[var])
1011 print result.ljust(width[index]), "|",
1012 index += 1
1013 print
1014
1016 width = []
1017 for var in results["head"]["vars"]:
1018 width.append(max(minWidth, len(var)+1))
1019 for result in results["results"]["bindings"]:
1020 index = 0
1021 for var in results["head"]["vars"]:
1022 result = self.__get_prettyprint_string_sparql_var_result(result[var])
1023 width[index] = max(width[index], len(result))
1024 index += 1
1025 return width
1026
1028 value = result["value"]
1029 lang = result.get("xml:lang", None)
1030 datatype = result.get("datatype", None)
1031 if lang is not None:
1032 value += "@"+lang
1033 if datatype is not None:
1034 value += " ["+datatype+"]"
1035 return value
1036