libdap  Updated for version 3.20.7
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <sys/stat.h>
34 
35 #ifdef WIN32
36 #include <io.h>
37 #endif
38 
39 #include <string>
40 #include <vector>
41 #include <functional>
42 #include <algorithm>
43 #include <sstream>
44 #include <fstream>
45 #include <iterator>
46 #include <cstdlib>
47 #include <cstring>
48 #include <cerrno>
49 
50 //#define DODS_DEBUG2
51 //#define HTTP_TRACE
52 //#define DODS_DEBUG
53 
54 #undef USE_GETENV
55 
56 
57 #include "debug.h"
58 #include "mime_util.h"
59 #include "media_types.h"
60 #include "GNURegex.h"
61 #include "HTTPCache.h"
62 #include "HTTPConnect.h"
63 #include "RCReader.h"
64 #include "HTTPResponse.h"
65 #include "HTTPCacheResponse.h"
66 
67 using namespace std;
68 
69 namespace libdap {
70 
71 // These global variables are not MT-Safe, but I'm leaving them as is because
72 // they are used only for debugging (set them in a debugger like gdb or ddd).
73 // They are not static because I think that many debuggers cannot access
74 // static variables. 08/07/02 jhrg
75 
76 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
77 int www_trace = 0;
78 
79 // Set this to 1 to turn on libcurl's VERY verbose mode.
80 int www_trace_extensive = 0;
81 
82 // Keep the temporary files; useful for debugging.
83 int dods_keep_temps = 0;
84 
85 #define CLIENT_ERR_MIN 400
86 #define CLIENT_ERR_MAX 417
87 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
88  {
89  "Bad Request:",
90  "Unauthorized: Contact the server administrator.",
91  "Payment Required.",
92  "Forbidden: Contact the server administrator.",
93  "Not Found: The data source or server could not be found.\n\
94  Often this means that the OPeNDAP server is missing or needs attention.\n\
95  Please contact the server administrator.",
96  "Method Not Allowed.",
97  "Not Acceptable.",
98  "Proxy Authentication Required.",
99  "Request Time-out.",
100  "Conflict.",
101  "Gone:.",
102  "Length Required.",
103  "Precondition Failed.",
104  "Request Entity Too Large.",
105  "Request URI Too Large.",
106  "Unsupported Media Type.",
107  "Requested Range Not Satisfiable.",
108  "Expectation Failed."
109  };
110 
111 #define SERVER_ERR_MIN 500
112 #define SERVER_ERR_MAX 505
113 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
114  {
115  "Internal Server Error.",
116  "Not Implemented.",
117  "Bad Gateway.",
118  "Service Unavailable.",
119  "Gateway Time-out.",
120  "HTTP Version Not Supported."
121  };
122 
125 static string
126 http_status_to_string(int status)
127 {
128  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
129  return string(http_client_errors[status - CLIENT_ERR_MIN]);
130  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
131  return string(http_server_errors[status - SERVER_ERR_MIN]);
132  else
133  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
134 }
135 
136 static ObjectType
137 determine_object_type(const string &header_value)
138 {
139  // DAP4 Data: application/vnd.opendap.dap4.data
140  // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
141 
142  string::size_type plus = header_value.find('+');
143  string base_type;
144  string type_extension = "";
145  if (plus != string::npos) {
146  base_type= header_value.substr(0, plus);
147  type_extension = header_value.substr(plus+1);
148  }
149  else
150  base_type = header_value;
151 
152  if (base_type == DMR_Content_Type
153  || (base_type.find("application/") != string::npos
154  && base_type.find("dap4.dataset-metadata") != string::npos)) {
155  if (type_extension == "xml")
156  return dap4_dmr;
157  else
158  return unknown_type;
159  }
160  else if (base_type == DAP4_DATA_Content_Type
161  || (base_type.find("application/") != string::npos
162  && base_type.find("dap4.data") != string::npos)) {
163  return dap4_data;
164  }
165  else if (header_value.find("text/html") != string::npos) {
166  return web_error;
167  }
168  else
169  return unknown_type;
170 }
171 
176 class ParseHeader : public unary_function<const string &, void>
177 {
178  ObjectType type; // What type of object is in the stream?
179  string server; // Server's version string.
180  string protocol; // Server's protocol version.
181  string location; // Url returned by server
182 
183 public:
184  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
185  { }
186 
187  void operator()(const string &line)
188  {
189  string name, value;
190  parse_mime_header(line, name, value);
191 
192  DBG2(cerr << name << ": " << value << endl);
193 
194  // Content-Type is used to determine the content of DAP4 responses, but allow the
195  // Content-Description header to override CT o preserve operation with DAP2 servers.
196  // jhrg 11/12/13
197  if (type == unknown_type && name == "content-type") {
198  type = determine_object_type(value); // see above
199  }
200  if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
201  type = get_description_type(value); // defined in mime_util.cc
202  }
203  // The second test (== "dods/0.0") tests if xopendap-server has already
204  // been seen. If so, use that header in preference to the old
205  // XDODS-Server header. jhrg 2/7/06
206  else if (name == "xdods-server" && server == "dods/0.0") {
207  server = value;
208  }
209  else if (name == "xopendap-server") {
210  server = value;
211  }
212  else if (name == "xdap") {
213  protocol = value;
214  }
215  else if (server == "dods/0.0" && name == "server") {
216  server = value;
217  }
218  else if (name == "location") {
219  location = value;
220  }
221  }
222 
223  ObjectType get_object_type()
224  {
225  return type;
226  }
227 
228  string get_server()
229  {
230  return server;
231  }
232 
233  string get_protocol()
234  {
235  return protocol;
236  }
237 
238  string get_location() {
239  return location;
240  }
241 };
242 
258 static size_t
259 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
260 {
261  DBG2(cerr << "Inside the header parser." << endl);
262  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
263 
264  // Grab the header, minus the trailing newline. Or \r\n pair.
265  string complete_line;
266  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
267  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
268  else
269  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
270 
271  // Store all non-empty headers that are not HTTP status codes
272  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
273  DBG(cerr << "Header line: " << complete_line << endl);
274  hdrs->push_back(complete_line);
275  }
276 
277  return size * nmemb;
278 }
279 
281 static int
282 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
283 {
284  string message(msg, size);
285 
286  switch (info) {
287  case CURLINFO_TEXT:
288  cerr << "Text: " << message; break;
289  case CURLINFO_HEADER_IN:
290  cerr << "Header in: " << message; break;
291  case CURLINFO_HEADER_OUT:
292  cerr << "Header out: " << message; break;
293  case CURLINFO_DATA_IN:
294  if (www_trace_extensive)
295  cerr << "Data in: " << message; break;
296  case CURLINFO_DATA_OUT:
297  if (www_trace_extensive)
298  cerr << "Data out: " << message; break;
299  case CURLINFO_END:
300  cerr << "End: " << message; break;
301 #ifdef CURLINFO_SSL_DATA_IN
302  case CURLINFO_SSL_DATA_IN:
303  cerr << "SSL Data in: " << message; break;
304 #endif
305 #ifdef CURLINFO_SSL_DATA_OUT
306  case CURLINFO_SSL_DATA_OUT:
307  cerr << "SSL Data out: " << message; break;
308 #endif
309  default:
310  if (www_trace_extensive)
311  cerr << "Curl info: " << message; break;
312  }
313  return 0;
314 }
315 
319 void
320 HTTPConnect::www_lib_init()
321 {
322  curl_global_init(CURL_GLOBAL_DEFAULT);
323 
324  d_curl = curl_easy_init();
325  if (!d_curl)
326  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
327 
328  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
329 
330  curl_easy_setopt(d_curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); // enables TLSv1.2 / TLSv1.3 version only
331 
332  // Now set options that will remain constant for the duration of this
333  // CURL object.
334 
335  // Set the proxy host.
336  if (!d_rcr->get_proxy_server_host().empty()) {
337  DBG(cerr << "Setting up a proxy server." << endl);
338  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
339  << endl);
340  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
341  << endl);
342  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
343  << endl);
344  curl_easy_setopt(d_curl, CURLOPT_PROXY,
345  d_rcr->get_proxy_server_host().c_str());
346  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
347  d_rcr->get_proxy_server_port());
348 
349  // As of 4/21/08 only NTLM, Digest and Basic work.
350 #ifdef CURLOPT_PROXYAUTH
351  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
352 #endif
353 
354  // Password might not be required. 06/21/04 jhrg
355  if (!d_rcr->get_proxy_server_userpw().empty())
356  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
357  d_rcr->get_proxy_server_userpw().c_str());
358  }
359 
360  // We have to set FailOnError to false for any of the non-Basic
361  // authentication schemes to work. 07/28/03 jhrg
362  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
363 
364  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
365  // choosing the the 'safest' one supported by the server.
366  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
367  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
368 
369  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
370  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
371  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
372  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
373  // param of save_raw_http_headers to a vector<string> object.
374 
375  // Follow 302 (redirect) responses
376  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
377  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
378 
379  // If the user turns off SSL validation...
380  if (d_rcr->get_validate_ssl() == 0) {
381  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
382  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
383  }
384 
385  // Set libcurl to use netrc to access data behind URS auth.
386  // libcurl will use the provided pathname for the ~/.netrc info. 08/23/19 kln
387  curl_easy_setopt(d_curl, CURLOPT_NETRC, 1);
388 
389  // Look to see if cookies are turned on in the .dodsrc file. If so,
390  // activate here. We honor 'session cookies' (cookies without an
391  // expiration date) here so that session-based SSO systems will work as
392  // expected.
393  if (!d_cookie_jar.empty()) {
394  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
395  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
396  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
397  }
398 
399  if (www_trace) {
400  cerr << "Curl version: " << curl_version() << endl;
401  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
402  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
403  }
404 }
405 
409 class BuildHeaders : public unary_function<const string &, void>
410 {
411  struct curl_slist *d_cl;
412 
413 public:
414  BuildHeaders() : d_cl(0)
415  {}
416 
417  void operator()(const string &header)
418  {
419  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
420  << endl);
421  d_cl = curl_slist_append(d_cl, header.c_str());
422  }
423 
424  struct curl_slist *get_headers()
425  {
426  return d_cl;
427  }
428 };
429 
444 long
445 HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
446 {
447  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
448 
449 #ifdef WIN32
450  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
451  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
452  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
453  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
454  // this issue is that one should not pass a FILE * to a windows DLL. Close
455  // inspection of libcurl yields that their default write function when using
456  // the CURLOPT_WRITEDATA is just "fwrite".
457  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
458  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
459 #else
460  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
461 #endif
462 
463  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
464  ostream_iterator<string>(cerr, "\n")));
465 
466  BuildHeaders req_hdrs;
467  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
468  req_hdrs);
469  if (headers)
470  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
471 
472  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
473 
474  // Turn off the proxy for this URL?
475  bool temporary_proxy = false;
476  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
477  DBG(cerr << "Suppress proxy for url: " << url << endl);
478  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
479  }
480 
481  string::size_type at_sign = url.find('@');
482  // Assume username:password present *and* assume it's an HTTP URL; it *is*
483  // HTTPConnect, after all. 7 is position after "http://"; the second arg
484  // to substr() is the sub string length.
485  if (at_sign != url.npos)
486  d_upstring = url.substr(7, at_sign - 7);
487 
488  if (!d_upstring.empty())
489  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
490 
491  // Pass save_raw_http_headers() a pointer to the vector<string> where the
492  // response headers may be stored. Callers can use the resp_hdrs
493  // value/result parameter to get the raw response header information .
494  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
495 
496  // This is the call that causes curl to go and get the remote resource and "write it down"
497  // utilizing the configuration state that has been previously conditioned by various perturbations
498  // of calls to curl_easy_setopt().
499  CURLcode res = curl_easy_perform(d_curl);
500 
501  // Free the header list and null the value in d_curl.
502  curl_slist_free_all(req_hdrs.get_headers());
503  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
504 
505  // Reset the proxy?
506  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
507  curl_easy_setopt(d_curl, CURLOPT_PROXY,
508  d_rcr->get_proxy_server_host().c_str());
509 
510  if (res != 0)
511  throw Error(d_error_buffer);
512 
513  long status;
514  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
515  if (res != 0)
516  throw Error(d_error_buffer);
517 
518  char *ct_ptr = 0;
519  res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
520  if (res == CURLE_OK && ct_ptr)
521  d_content_type = ct_ptr;
522  else
523  d_content_type = "";
524 
525  return status;
526 }
527 
531 bool
532 HTTPConnect::url_uses_proxy_for(const string &url)
533 {
534  if (d_rcr->is_proxy_for_used()) {
535  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
536  int index = 0, matchlen;
537  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
538  }
539 
540  return false;
541 }
542 
546 bool
547 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
548 {
549  return d_rcr->is_no_proxy_for_used()
550  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
551 }
552 
553 // Public methods. Mostly...
554 
561 HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
562  d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
563 
564 {
565  d_accept_deflate = rcr->get_deflate();
566  d_rcr = rcr;
567 
568  // Load in the default headers to send with a request. The empty Pragma
569  // headers overrides libcurl's default Pragma: no-cache header (which
570  // will disable caching by Squid, et c.). The User-Agent header helps
571  // make server logs more readable. 05/05/03 jhrg
572  d_request_headers.push_back(string("Pragma:"));
573  string user_agent = string("User-Agent: ") + string(CNAME)
574  + string("/") + string(CVER);
575  d_request_headers.push_back(user_agent);
576  if (d_accept_deflate)
577  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
578 
579  // HTTPCache::instance returns a valid ptr or 0.
580  if (d_rcr->get_use_cache())
581  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
582  else
583  d_http_cache = 0;
584 
585  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
586  << ")" << endl);
587 
588  if (d_http_cache) {
589  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
590  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
591  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
592  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
593  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
594  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
595  }
596 
597  d_cookie_jar = rcr->get_cookie_jar();
598 
599  www_lib_init(); // This may throw either Error or InternalErr
600 }
601 
602 HTTPConnect::~HTTPConnect()
603 {
604  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
605 
606  curl_easy_cleanup(d_curl);
607 
608  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
609 }
610 
612 class HeaderMatch : public unary_function<const string &, bool> {
613  const string &d_header;
614  public:
615  HeaderMatch(const string &header) : d_header(header) {}
616  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
617 };
618 
631 HTTPResponse *
632 HTTPConnect::fetch_url(const string &url)
633 {
634 #ifdef HTTP_TRACE
635  cout << "GET " << url << " HTTP/1.0" << endl;
636 #endif
637 
638  HTTPResponse *stream;
639 
640  if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
641  stream = caching_fetch_url(url);
642  }
643  else {
644  stream = plain_fetch_url(url);
645  }
646 
647 #ifdef HTTP_TRACE
648  stringstream ss;
649  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
650  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
651  ss << stream->get_headers()->at(i) << endl;
652  }
653  cout << ss.str();
654 #endif
655 
656  ParseHeader parser;
657 
658  // An apparent quirk of libcurl is that it does not pass the Content-type
659  // header to the callback used to save them, but check and add it from the
660  // saved state variable only if it's not there (without this a test failed
661  // in HTTPCacheTest). jhrg 11/12/13
662  if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
663  HeaderMatch("Content-Type:")) == stream->get_headers()->end())
664  stream->get_headers()->push_back("Content-Type: " + d_content_type);
665 
666  parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
667 
668 #ifdef HTTP_TRACE
669  cout << endl << endl;
670 #endif
671 
672  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
673  if (parser.get_location() != "" &&
674  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
675  delete stream;
676  return fetch_url(parser.get_location());
677  }
678 
679  stream->set_type(parser.get_object_type()); // uses the value of content-description
680 
681  stream->set_version(parser.get_server());
682  stream->set_protocol(parser.get_protocol());
683 
684  if (d_use_cpp_streams) {
685  stream->transform_to_cpp();
686  }
687 
688  return stream;
689 }
690 
691 // Look around for a reasonable place to put a temporary file. Check first
692 // the value of the TMPDIR env var. If that does not yeild a path that's
693 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
694 // defined in stdio.h. If both come up empty, then use `./'.
695 
696 // Change this to a version that either returns a string or an open file
697 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
698 // (see open()) to make it more secure. Ideal solution: get deserialize()
699 // methods to read from a stream returned by libcurl, not from a temporary
700 // file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
701 static string
702 get_tempfile_template(const string &file_template)
703 {
704  string c;
705 
706  // Windows has one idea of the standard name(s) for a temporary files dir
707 #ifdef WIN32
708  // white list for a WIN32 directory
709  Regex directory("[-a-zA-Z0-9_:\\]*");
710 
711  // If we're OK to use getenv(), try it.
712 #ifdef USE_GETENV
713  c = getenv("TEMP");
714  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
715  goto valid_temp_directory;
716 
717  c= getenv("TMP");
718  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
719  goto valid_temp_directory;
720 #endif // USE_GETENV
721 
722  // The windows default
723  c = "c:\tmp";
724  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
725  goto valid_temp_directory;
726 
727 #else // Unix/Linux/OSX has another...
728  // white list for a directory
729  Regex directory("[-a-zA-Z0-9_/]*");
730 #ifdef USE_GETENV
731  c = getenv("TMPDIR");
732  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
733  goto valid_temp_directory;
734 #endif // USE_GETENV
735 
736  // Unix defines this sometimes - if present, use it.
737 #ifdef P_tmpdir
738  if (access(P_tmpdir, W_OK | R_OK) == 0) {
739  c = P_tmpdir;
740  goto valid_temp_directory;
741  }
742 #endif
743 
744  // The Unix default
745  c = "/tmp";
746  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
747  goto valid_temp_directory;
748 
749 #endif // WIN32
750 
751  // If we found nothing useful, use the current directory
752  c = ".";
753 
754 valid_temp_directory:
755 
756 #ifdef WIN32
757  c += "\\" + file_template;
758 #else
759  c += "/" + file_template;
760 #endif
761 
762  return c;
763 }
764 
783 string
784 get_temp_file(FILE *&stream) throw(Error)
785 {
786  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
787 
788  vector<char> pathname(dods_temp.length() + 1);
789 
790  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
791 
792  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
793 
794  // Open truncated for update. NB: mkstemp() returns a file descriptor.
795 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
796  stream = fopen(_mktemp(&pathname[0]), "w+b");
797 #else
798  // Make sure that temp files are accessible only by the owner.
799  int mask = umask(077);
800  if (mask < 0)
801  throw Error("Could not set the file creation mask: " + string(strerror(errno)));
802  int fd = mkstemp(&pathname[0]);
803  if (fd < 0)
804  throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
805 
806  stream = fdopen(fd, "w+");
807  umask(mask);
808 #endif
809 
810  if (!stream)
811  throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
812 
813  dods_temp = &pathname[0];
814  return dods_temp;
815 }
816 
817 
823 void
824 close_temp(FILE *s, const string &name)
825 {
826  int res = fclose(s);
827  if (res)
828  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
829 
830  res = unlink(name.c_str());
831  if (res != 0)
832  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
833 }
834 
856 HTTPResponse *
857 HTTPConnect::caching_fetch_url(const string &url)
858 {
859  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
860 
861  vector<string> *headers = new vector<string>;
862  string file_name;
863  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
864  if (!s) {
865  // url not in cache; get it and cache it
866  DBGN(cerr << "no; getting response and caching." << endl);
867  delete headers; headers = 0;
868  time_t now = time(0);
869  HTTPResponse *rs = plain_fetch_url(url);
870  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
871 
872  return rs;
873  }
874  else { // url in cache
875  DBGN(cerr << "yes... ");
876 
877  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
878  DBGN(cerr << "and it's valid; using cached response." << endl);
879  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
880  return crs;
881  }
882  else { // url in cache but not valid; validate
883  DBGN(cerr << "but it's not valid; validating... ");
884 
885  d_http_cache->release_cached_response(s); // This closes 's'
886  headers->clear();
887  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
888  FILE *body = 0;
889  string dods_temp = get_temp_file(body);
890  time_t now = time(0); // When was the request made (now).
891  long http_status;
892 
893  try {
894  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
895  rewind(body);
896  }
897  catch (Error &e) {
898  close_temp(body, dods_temp);
899  delete headers;
900  throw ;
901  }
902 
903  switch (http_status) {
904  case 200: { // New headers and new body
905  DBGN(cerr << "read a new response; caching." << endl);
906 
907  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
908  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
909 
910  return rs;
911  }
912 
913  case 304: { // Just new headers, use cached body
914  DBGN(cerr << "cached response valid; updating." << endl);
915 
916  close_temp(body, dods_temp);
917  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
918  string file_name;
919  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
920  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
921  return crs;
922  }
923 
924  default: { // Oops.
925  close_temp(body, dods_temp);
926  if (http_status >= 400) {
927  delete headers; headers = 0;
928  string msg = "Error while reading the URL: ";
929  msg += url;
930  msg
931  += ".\nThe OPeNDAP server returned the following message:\n";
932  msg += http_status_to_string(http_status);
933  throw Error(msg);
934  }
935  else {
936  delete headers; headers = 0;
937  throw InternalErr(__FILE__, __LINE__,
938  "Bad response from the HTTP server: " + long_to_string(http_status));
939  }
940  }
941  }
942  }
943  }
944 
945  throw InternalErr(__FILE__, __LINE__, "Should never get here");
946 }
947 
959 HTTPResponse *
960 HTTPConnect::plain_fetch_url(const string &url)
961 {
962  DBG(cerr << "Getting URL: " << url << endl);
963  FILE *stream = 0;
964  string dods_temp = get_temp_file(stream);
965  vector<string> *resp_hdrs = new vector<string>;
966 
967  int status = -1;
968  try {
969  status = read_url(url, stream, resp_hdrs); // Throws Error.
970  if (status >= 400) {
971  // delete resp_hdrs; resp_hdrs = 0;
972  string msg = "Error while reading the URL: ";
973  msg += url;
974  msg += ".\nThe OPeNDAP server returned the following message:\n";
975  msg += http_status_to_string(status);
976  throw Error(msg);
977  }
978  }
979 
980  catch (Error &e) {
981  delete resp_hdrs;
982  close_temp(stream, dods_temp);
983  throw;
984  }
985 
986 #if 0
987  if (d_use_cpp_streams) {
988  fclose(stream);
989  fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
990  return new HTTPResponse(in, status, resp_hdrs, dods_temp);
991  }
992  else {
993 #endif
994  rewind(stream);
995  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
996 #if 0
997 }
998 #endif
999 }
1000 
1012 void
1014 {
1015  d_accept_deflate = deflate;
1016 
1017  if (d_accept_deflate) {
1018  if (find(d_request_headers.begin(), d_request_headers.end(),
1019  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1020  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1021  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1022  ostream_iterator<string>(cerr, "\n")));
1023  }
1024  else {
1025  vector<string>::iterator i;
1026  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1027  bind2nd(equal_to<string>(),
1028  string("Accept-Encoding: deflate, gzip, compress")));
1029  d_request_headers.erase(i, d_request_headers.end());
1030  }
1031 }
1032 
1041 void
1042 HTTPConnect::set_xdap_protocol(int major, int minor)
1043 {
1044  // Look for, and remove if one exists, an XDAP-Accept header
1045  vector<string>::iterator i;
1046  i = find_if(d_request_headers.begin(), d_request_headers.end(),
1047  HeaderMatch("XDAP-Accept:"));
1048  if (i != d_request_headers.end())
1049  d_request_headers.erase(i);
1050 
1051  // Record and add the new header value
1052  d_dap_client_protocol_major = major;
1053  d_dap_client_protocol_minor = minor;
1054  ostringstream xdap_accept;
1055  xdap_accept << "XDAP-Accept: " << major << "." << minor;
1056 
1057  d_request_headers.push_back(xdap_accept.str());
1058 
1059  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1060  ostream_iterator<string>(cerr, "\n")));
1061 }
1062 
1078 void
1079 HTTPConnect::set_credentials(const string &u, const string &p)
1080 {
1081  if (u.empty())
1082  return;
1083 
1084  // Store the credentials locally.
1085  d_username = u;
1086  d_password = p;
1087 
1088  d_upstring = u + ":" + p;
1089 }
1090 
1091 } // namespace libdap
A class for error processing.
Definition: Error.h:93
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1156
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1571
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1249
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1388
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1319
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1480
void set_accept_deflate(bool defalte)
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:632
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Definition: InternalErr.h:65
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:912
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:784
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:824
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58