Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" Google BigQuery support """ 

2from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union 

3 

4from pandas.compat._optional import import_optional_dependency 

5 

6if TYPE_CHECKING: 

7 from pandas import DataFrame 

8 

9 

10def _try_import(): 

11 # since pandas is a dependency of pandas-gbq 

12 # we need to import on first use 

13 msg = ( 

14 "pandas-gbq is required to load data from Google BigQuery. " 

15 "See the docs: https://pandas-gbq.readthedocs.io." 

16 ) 

17 pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) 

18 return pandas_gbq 

19 

20 

21def read_gbq( 

22 query: str, 

23 project_id: Optional[str] = None, 

24 index_col: Optional[str] = None, 

25 col_order: Optional[List[str]] = None, 

26 reauth: bool = False, 

27 auth_local_webserver: bool = False, 

28 dialect: Optional[str] = None, 

29 location: Optional[str] = None, 

30 configuration: Optional[Dict[str, Any]] = None, 

31 credentials=None, 

32 use_bqstorage_api: Optional[bool] = None, 

33 private_key=None, 

34 verbose=None, 

35 progress_bar_type: Optional[str] = None, 

36) -> "DataFrame": 

37 """ 

38 Load data from Google BigQuery. 

39 

40 This function requires the `pandas-gbq package 

41 <https://pandas-gbq.readthedocs.io>`__. 

42 

43 See the `How to authenticate with Google BigQuery 

44 <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__ 

45 guide for authentication instructions. 

46 

47 Parameters 

48 ---------- 

49 query : str 

50 SQL-Like Query to return data values. 

51 project_id : str, optional 

52 Google BigQuery Account project ID. Optional when available from 

53 the environment. 

54 index_col : str, optional 

55 Name of result column to use for index in results DataFrame. 

56 col_order : list(str), optional 

57 List of BigQuery column names in the desired order for results 

58 DataFrame. 

59 reauth : bool, default False 

60 Force Google BigQuery to re-authenticate the user. This is useful 

61 if multiple accounts are used. 

62 auth_local_webserver : bool, default False 

63 Use the `local webserver flow`_ instead of the `console flow`_ 

64 when getting user credentials. 

65 

66 .. _local webserver flow: 

67 http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server 

68 .. _console flow: 

69 http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console 

70 

71 *New in version 0.2.0 of pandas-gbq*. 

72 dialect : str, default 'legacy' 

73 Note: The default value is changing to 'standard' in a future version. 

74 

75 SQL syntax dialect to use. Value can be one of: 

76 

77 ``'legacy'`` 

78 Use BigQuery's legacy SQL dialect. For more information see 

79 `BigQuery Legacy SQL Reference 

80 <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__. 

81 ``'standard'`` 

82 Use BigQuery's standard SQL, which is 

83 compliant with the SQL 2011 standard. For more information 

84 see `BigQuery Standard SQL Reference 

85 <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__. 

86 

87 .. versionchanged:: 0.24.0 

88 location : str, optional 

89 Location where the query job should run. See the `BigQuery locations 

90 documentation 

91 <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a 

92 list of available locations. The location must match that of any 

93 datasets used in the query. 

94 

95 *New in version 0.5.0 of pandas-gbq*. 

96 configuration : dict, optional 

97 Query config parameters for job processing. 

98 For example: 

99 

100 configuration = {'query': {'useQueryCache': False}} 

101 

102 For more information see `BigQuery REST API Reference 

103 <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__. 

104 credentials : google.auth.credentials.Credentials, optional 

105 Credentials for accessing Google APIs. Use this parameter to override 

106 default credentials, such as to use Compute Engine 

107 :class:`google.auth.compute_engine.Credentials` or Service Account 

108 :class:`google.oauth2.service_account.Credentials` directly. 

109 

110 *New in version 0.8.0 of pandas-gbq*. 

111 

112 .. versionadded:: 0.24.0 

113 use_bqstorage_api : bool, default False 

114 Use the `BigQuery Storage API 

115 <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to 

116 download query results quickly, but at an increased cost. To use this 

117 API, first `enable it in the Cloud Console 

118 <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__. 

119 You must also have the `bigquery.readsessions.create 

120 <https://cloud.google.com/bigquery/docs/access-control#roles>`__ 

121 permission on the project you are billing queries to. 

122 

123 This feature requires version 0.10.0 or later of the ``pandas-gbq`` 

124 package. It also requires the ``google-cloud-bigquery-storage`` and 

125 ``fastavro`` packages. 

126 

127 .. versionadded:: 0.25.0 

128 progress_bar_type : Optional, str 

129 If set, use the `tqdm <https://tqdm.github.io/>`__ library to 

130 display a progress bar while the data downloads. Install the 

131 ``tqdm`` package to use this feature. 

132 

133 Possible values of ``progress_bar_type`` include: 

134 

135 ``None`` 

136 No progress bar. 

137 ``'tqdm'`` 

138 Use the :func:`tqdm.tqdm` function to print a progress bar 

139 to :data:`sys.stderr`. 

140 ``'tqdm_notebook'`` 

141 Use the :func:`tqdm.tqdm_notebook` function to display a 

142 progress bar as a Jupyter notebook widget. 

143 ``'tqdm_gui'`` 

144 Use the :func:`tqdm.tqdm_gui` function to display a 

145 progress bar as a graphical dialog box. 

146 

147 Note that his feature requires version 0.12.0 or later of the 

148 ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly 

149 different than ``pandas-gbq``, here the default is ``None``. 

150 

151 .. versionadded:: 1.0.0 

152 

153 Returns 

154 ------- 

155 df: DataFrame 

156 DataFrame representing results of query. 

157 

158 See Also 

159 -------- 

160 pandas_gbq.read_gbq : This function in the pandas-gbq library. 

161 DataFrame.to_gbq : Write a DataFrame to Google BigQuery. 

162 """ 

163 pandas_gbq = _try_import() 

164 

165 kwargs: Dict[str, Union[str, bool]] = {} 

166 

167 # START: new kwargs. Don't populate unless explicitly set. 

168 if use_bqstorage_api is not None: 

169 kwargs["use_bqstorage_api"] = use_bqstorage_api 

170 

171 if progress_bar_type is not None: 

172 kwargs["progress_bar_type"] = progress_bar_type 

173 # END: new kwargs 

174 

175 return pandas_gbq.read_gbq( 

176 query, 

177 project_id=project_id, 

178 index_col=index_col, 

179 col_order=col_order, 

180 reauth=reauth, 

181 auth_local_webserver=auth_local_webserver, 

182 dialect=dialect, 

183 location=location, 

184 configuration=configuration, 

185 credentials=credentials, 

186 **kwargs, 

187 ) 

188 

189 

190def to_gbq( 

191 dataframe: "DataFrame", 

192 destination_table: str, 

193 project_id: Optional[str] = None, 

194 chunksize: Optional[int] = None, 

195 reauth: bool = False, 

196 if_exists: str = "fail", 

197 auth_local_webserver: bool = False, 

198 table_schema: Optional[List[Dict[str, str]]] = None, 

199 location: Optional[str] = None, 

200 progress_bar: bool = True, 

201 credentials=None, 

202 verbose=None, 

203 private_key=None, 

204) -> None: 

205 pandas_gbq = _try_import() 

206 pandas_gbq.to_gbq( 

207 dataframe, 

208 destination_table, 

209 project_id=project_id, 

210 chunksize=chunksize, 

211 reauth=reauth, 

212 if_exists=if_exists, 

213 auth_local_webserver=auth_local_webserver, 

214 table_schema=table_schema, 

215 location=location, 

216 progress_bar=progress_bar, 

217 credentials=credentials, 

218 verbose=verbose, 

219 private_key=private_key, 

220 )