from datasets import load_dataset
url = "https://github.com/crux82/squad-it/raw/master/"
data_files = {
"train": url + "SQuAD_it-train.json.gz",
"test": url + "SQuAD_it-test.json.gz",
}
squad_it_dataset = load_dataset("json", data_files=data_files, field="data")
Traceback (most recent call last)
File ~/userfolder/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py:445, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
441 except BaseException as e:
442 # Remove the TypeError from the exception chain in
443 # Python 3 (including for exceptions like SystemExit).
444 # Otherwise it looks like a bug in the code.
--> 445 six.raise_from(e, None)
446 except (SocketTimeout, BaseSSLError, SocketError) as e:
File <string>:3, in raise_from(value, from_value)
File ~/userfolder/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py:440, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
439 try:
--> 440 httplib_response = conn.getresponse()
441 except BaseException as e:
442 # Remove the TypeError from the exception chain in
443 # Python 3 (including for exceptions like SystemExit).
444 # Otherwise it looks like a bug in the code.
File ~/userfolder/anaconda3/lib/python3.9/http/client.py:1371, in HTTPConnection.getresponse(self)
1370 try:
-> 1371 response.begin()
1372 except ConnectionError:
...
--> 529 raise ReadTimeout(e, request=request)
530 else:
531 raise
ReadTimeout: HTTPSConnectionPool(host='github.com', port=443): Read timed out. (read timeout=10.0)
应该是校园网和内部网不能很好连接github的问题,换成私人网络或者流量应该就可以了