diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 06ad8827a5642..87157205591a2 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -15,9 +15,6 @@ def _check_google_client_version(): - if compat.PY3: - raise NotImplementedError("Google's libraries do not support Python 3 yet") - try: import pkg_resources @@ -26,8 +23,9 @@ def _check_google_client_version(): _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version - if LooseVersion(_GOOGLE_API_CLIENT_VERSION) < '1.2.0': - raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google " + required_version = '1.4.0' if compat.PY3 else '1.2.0' + if LooseVersion(_GOOGLE_API_CLIENT_VERSION) < required_version: + raise ImportError("pandas requires google-api-python-client >= " + required_version + " for Google " "BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION) logger = logging.getLogger('pandas.io.gbq') @@ -133,7 +131,7 @@ def get_service(self, credentials): return bigquery_service - def run_query(self, query): + def run_query(self, query, silent): try: from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError @@ -182,7 +180,8 @@ def run_query(self, query): job_reference = query_reply['jobReference'] while(not query_reply.get('jobComplete', False)): - print('Job not yet complete...') + if not silent: + print('Job is not yet complete...') query_reply = job_collection.getQueryResults( projectId=job_reference['projectId'], jobId=job_reference['jobId']).execute() @@ -267,10 +266,10 @@ def _parse_data(schema, rows): fields = schema['fields'] col_types = [field['type'] for field in fields] - col_names = [field['name'].encode('ascii', 'ignore') for field in fields] + col_names = [field['name'] for field in fields] col_dtypes = [dtype_map.get(field['type'], object) for field in fields] page_array = np.zeros((len(rows),), - dtype=zip(col_names, col_dtypes)) + dtype=list(zip(col_names, col_dtypes))) for row_num, raw_row in enumerate(rows): entries = raw_row.get('f', []) @@ -294,7 +293,7 @@ def _parse_entry(field_value, field_type): return field_value -def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False): +def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, silent = False): """Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -319,6 +318,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. + silent : boolean (default False) + Do not print status messages during query execution if True Returns ------- @@ -332,7 +333,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals raise TypeError("Missing required parameter: project_id") connector = GbqConnector(project_id, reauth = reauth) - schema, pages = connector.run_query(query) + schema, pages = connector.run_query(query, silent = silent) dataframe_list = [] while len(pages) > 0: page = pages.pop() diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 5417842d3f863..c9883414da36f 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -9,6 +9,7 @@ import sys import platform from time import sleep +from io import StringIO import numpy as np @@ -39,47 +40,43 @@ def missing_bq(): return True def _test_imports(): - if not compat.PY3: + required_version = '1.4.0' if compat.PY3 else '1.2.0' - global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ - _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED + global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ + _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED - try: - import pkg_resources - _SETUPTOOLS_INSTALLED = True - except ImportError: - _SETUPTOOLS_INSTALLED = False + try: + import pkg_resources + _SETUPTOOLS_INSTALLED = True + except ImportError: + _SETUPTOOLS_INSTALLED = False - if _SETUPTOOLS_INSTALLED: - try: - from apiclient.discovery import build - from apiclient.errors import HttpError + if _SETUPTOOLS_INSTALLED: + try: + from apiclient.discovery import build + from apiclient.errors import HttpError - from oauth2client.client import OAuth2WebServerFlow - from oauth2client.client import AccessTokenRefreshError + from oauth2client.client import OAuth2WebServerFlow + from oauth2client.client import AccessTokenRefreshError - from oauth2client.file import Storage - from oauth2client.tools import run_flow - _GOOGLE_API_CLIENT_INSTALLED=True - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version + from oauth2client.file import Storage + from oauth2client.tools import run_flow + _GOOGLE_API_CLIENT_INSTALLED=True + _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version - if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0': - _GOOGLE_API_CLIENT_VALID_VERSION = True + if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= required_version: + _GOOGLE_API_CLIENT_VALID_VERSION = True - except ImportError: - _GOOGLE_API_CLIENT_INSTALLED = False + except ImportError: + _GOOGLE_API_CLIENT_INSTALLED = False - try: - import httplib2 - _HTTPLIB2_INSTALLED = True - except ImportError: - _HTTPLIB2_INSTALLED = False + try: + import httplib2 + _HTTPLIB2_INSTALLED = True + except ImportError: + _HTTPLIB2_INSTALLED = False - - if compat.PY3: - raise NotImplementedError("Google's libraries do not support Python 3 yet") - if not _SETUPTOOLS_INSTALLED: raise ImportError('Could not import pkg_resources (setuptools).') @@ -87,7 +84,7 @@ def _test_imports(): raise ImportError('Could not import Google API Client.') if not _GOOGLE_API_CLIENT_VALID_VERSION: - raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google " + raise ImportError("pandas requires google-api-python-client >= " + required_version + " for Google " "BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION) if not _HTTPLIB2_INSTALLED: @@ -295,6 +292,14 @@ def test_download_dataset_larger_than_200k_rows(self): # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] GROUP EACH BY id ORDER BY id ASC LIMIT 200005", project_id=PROJECT_ID) self.assertEqual(len(df.drop_duplicates()), 200005) + + def test_silent_option_true(self): + stdout = sys.stdout + sys.stdout = StringIO() + gbq.read_gbq("SELECT 3", project_id = PROJECT_ID, silent = True) + output = sys.stdout.getvalue() + sys.stdout = stdout + tm.assert_equal(output, "") class TestToGBQIntegration(tm.TestCase): # This class requires bq.py to be installed for setup/teardown.