Skip to content

Commit 0975724

Browse files
HemangChothanitswast
authored andcommitted
Add maximum_bytes_billed argument and context.default_query_job_config property to magics. (googleapis#8179)
BigQuery queries can get expensive, but the `maximum_bytes_billed` query option adds a cap to the price billed and rejects queries that could be too expensive. Provide a default value for `maximum_bytes_billed` in the `%%bigquery` magics by specifying a value for `google.cloud.bigquery.magics.context.default_query_job_config.maximum_bytes_billed`.
1 parent 217d93e commit 0975724

File tree

2 files changed

+203
-32
lines changed

2 files changed

+203
-32
lines changed

bigquery/google/cloud/bigquery/magics.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ def __init__(self):
161161
self._project = None
162162
self._connection = None
163163
self._use_bqstorage_api = None
164+
self._default_query_job_config = bigquery.QueryJobConfig()
164165

165166
@property
166167
def credentials(self):
@@ -237,6 +238,28 @@ def use_bqstorage_api(self):
237238
def use_bqstorage_api(self, value):
238239
self._use_bqstorage_api = value
239240

241+
@property
242+
def default_query_job_config(self):
243+
"""google.cloud.bigquery.job.QueryJobConfig: Default job
244+
configuration for queries.
245+
246+
The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
247+
used for queries. Some properties can be overridden with arguments to
248+
the magics.
249+
250+
Example:
251+
Manually setting the default value for ``maximum_bytes_billed``
252+
to 100 MB:
253+
254+
>>> from google.cloud.bigquery import magics
255+
>>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
256+
"""
257+
return self._default_query_job_config
258+
259+
@default_query_job_config.setter
260+
def default_query_job_config(self, value):
261+
self._default_query_job_config = value
262+
240263

241264
context = Context()
242265

@@ -291,6 +314,14 @@ def _run_query(client, query, job_config=None):
291314
default=None,
292315
help=("Project to use for executing this query. Defaults to the context project."),
293316
)
317+
@magic_arguments.argument(
318+
"--maximum_bytes_billed",
319+
default=None,
320+
help=(
321+
"maximum_bytes_billed to use for executing this query. Defaults to "
322+
"the context default_query_job_config.maximum_bytes_billed."
323+
),
324+
)
294325
@magic_arguments.argument(
295326
"--use_legacy_sql",
296327
action="store_true",
@@ -363,7 +394,11 @@ def _cell_magic(line, query):
363394
)
364395

365396
project = args.project or context.project
366-
client = bigquery.Client(project=project, credentials=context.credentials)
397+
client = bigquery.Client(
398+
project=project,
399+
credentials=context.credentials,
400+
default_query_job_config=context.default_query_job_config,
401+
)
367402
if context._connection:
368403
client._connection = context._connection
369404
bqstorage_client = _make_bqstorage_client(
@@ -372,6 +407,12 @@ def _cell_magic(line, query):
372407
job_config = bigquery.job.QueryJobConfig()
373408
job_config.query_parameters = params
374409
job_config.use_legacy_sql = args.use_legacy_sql
410+
411+
if args.maximum_bytes_billed == "None":
412+
job_config.maximum_bytes_billed = 0
413+
elif args.maximum_bytes_billed is not None:
414+
value = int(args.maximum_bytes_billed)
415+
job_config.maximum_bytes_billed = value
375416
query_job = _run_query(client, query, job_config)
376417

377418
if not args.verbose:

bigquery/tests/unit/test_magics.py

Lines changed: 161 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import re
16-
import mock
17-
import six
1817
from concurrent import futures
1918

19+
import mock
2020
import pytest
21+
import six
2122

2223
try:
2324
import pandas
@@ -37,6 +38,7 @@
3738
from google.cloud import bigquery_storage_v1beta1
3839
except ImportError: # pragma: NO COVER
3940
bigquery_storage_v1beta1 = None
41+
from google.cloud.bigquery import job
4042
from google.cloud.bigquery import table
4143
from google.cloud.bigquery import magics
4244
from tests.unit.helpers import make_connection
@@ -63,6 +65,26 @@ def ipython_interactive(request, ipython):
6365
yield ipython
6466

6567

68+
JOB_REFERENCE_RESOURCE = {"projectId": "its-a-project-eh", "jobId": "some-random-id"}
69+
TABLE_REFERENCE_RESOURCE = {
70+
"projectId": "its-a-project-eh",
71+
"datasetId": "ds",
72+
"tableId": "persons",
73+
}
74+
QUERY_RESOURCE = {
75+
"jobReference": JOB_REFERENCE_RESOURCE,
76+
"configuration": {
77+
"query": {
78+
"destinationTable": TABLE_REFERENCE_RESOURCE,
79+
"query": "SELECT 42 FROM `life.the_universe.and_everything`;",
80+
"queryParameters": [],
81+
"useLegacySql": False,
82+
}
83+
},
84+
"status": {"state": "DONE"},
85+
}
86+
87+
6688
def test_context_credentials_auto_set_w_application_default_credentials():
6789
"""When Application Default Credentials are set, the context credentials
6890
will be created the first time it is called
@@ -117,22 +139,13 @@ def test_context_connection_can_be_overriden():
117139
default_patch = mock.patch(
118140
"google.auth.default", return_value=(credentials_mock, project)
119141
)
142+
job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE)
143+
job_reference["projectId"] = project
120144

121145
query = "select * from persons"
122-
job_reference = {"projectId": project, "jobId": "some-random-id"}
123-
table = {"projectId": project, "datasetId": "ds", "tableId": "persons"}
124-
resource = {
125-
"jobReference": job_reference,
126-
"configuration": {
127-
"query": {
128-
"destinationTable": table,
129-
"query": query,
130-
"queryParameters": [],
131-
"useLegacySql": False,
132-
}
133-
},
134-
"status": {"state": "DONE"},
135-
}
146+
resource = copy.deepcopy(QUERY_RESOURCE)
147+
resource["jobReference"] = job_reference
148+
resource["configuration"]["query"]["query"] = query
136149
data = {"jobReference": job_reference, "totalRows": 0, "rows": []}
137150

138151
conn = magics.context._connection = make_connection(resource, data)
@@ -170,22 +183,13 @@ def test_context_no_connection():
170183
default_patch = mock.patch(
171184
"google.auth.default", return_value=(credentials_mock, project)
172185
)
186+
job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE)
187+
job_reference["projectId"] = project
173188

174189
query = "select * from persons"
175-
job_reference = {"projectId": project, "jobId": "some-random-id"}
176-
table = {"projectId": project, "datasetId": "ds", "tableId": "persons"}
177-
resource = {
178-
"jobReference": job_reference,
179-
"configuration": {
180-
"query": {
181-
"destinationTable": table,
182-
"query": query,
183-
"queryParameters": [],
184-
"useLegacySql": False,
185-
}
186-
},
187-
"status": {"state": "DONE"},
188-
}
190+
resource = copy.deepcopy(QUERY_RESOURCE)
191+
resource["jobReference"] = job_reference
192+
resource["configuration"]["query"]["query"] = query
189193
data = {"jobReference": job_reference, "totalRows": 0, "rows": []}
190194

191195
conn_mock = make_connection(resource, data, data, data)
@@ -239,7 +243,8 @@ def test__run_query():
239243
assert updates[0] == expected_first_line
240244
execution_updates = updates[1:-1]
241245
assert len(execution_updates) == 3 # one update per API response
242-
assert all(re.match("Query executing: .*s", line) for line in execution_updates)
246+
for line in execution_updates:
247+
assert re.match("Query executing: .*s", line)
243248
assert re.match("Query complete after .*s", updates[-1])
244249

245250

@@ -548,6 +553,131 @@ def test_bigquery_magic_without_bqstorage(monkeypatch):
548553
assert isinstance(return_value, pandas.DataFrame)
549554

550555

556+
@pytest.mark.usefixtures("ipython_interactive")
557+
def test_bigquery_magic_w_maximum_bytes_billed_invalid():
558+
ip = IPython.get_ipython()
559+
ip.extension_manager.load_extension("google.cloud.bigquery")
560+
magics.context._project = None
561+
562+
sql = "SELECT 17 AS num"
563+
564+
with pytest.raises(ValueError):
565+
ip.run_cell_magic("bigquery", "--maximum_bytes_billed=abc", sql)
566+
567+
568+
@pytest.mark.parametrize(
569+
"param_value,expected", [("987654321", "987654321"), ("None", "0")]
570+
)
571+
@pytest.mark.usefixtures("ipython_interactive")
572+
def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, expected):
573+
ip = IPython.get_ipython()
574+
ip.extension_manager.load_extension("google.cloud.bigquery")
575+
magics.context._project = None
576+
577+
# Set the default maximum bytes billed, so we know it's overridable by the param.
578+
magics.context.default_query_job_config.maximum_bytes_billed = 1234567
579+
580+
project = "test-project"
581+
job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE)
582+
job_reference["projectId"] = project
583+
query = "SELECT 17 AS num"
584+
resource = copy.deepcopy(QUERY_RESOURCE)
585+
resource["jobReference"] = job_reference
586+
resource["configuration"]["query"]["query"] = query
587+
data = {"jobReference": job_reference, "totalRows": 0, "rows": []}
588+
credentials_mock = mock.create_autospec(
589+
google.auth.credentials.Credentials, instance=True
590+
)
591+
default_patch = mock.patch(
592+
"google.auth.default", return_value=(credentials_mock, "general-project")
593+
)
594+
conn = magics.context._connection = make_connection(resource, data)
595+
list_rows_patch = mock.patch(
596+
"google.cloud.bigquery.client.Client.list_rows",
597+
return_value=google.cloud.bigquery.table._EmptyRowIterator(),
598+
)
599+
with list_rows_patch, default_patch:
600+
ip.run_cell_magic(
601+
"bigquery", "--maximum_bytes_billed={}".format(param_value), query
602+
)
603+
604+
_, req = conn.api_request.call_args_list[0]
605+
sent_config = req["data"]["configuration"]["query"]
606+
assert sent_config["maximumBytesBilled"] == expected
607+
608+
609+
@pytest.mark.usefixtures("ipython_interactive")
610+
def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace():
611+
ip = IPython.get_ipython()
612+
ip.extension_manager.load_extension("google.cloud.bigquery")
613+
magics.context._project = None
614+
615+
magics.context.default_query_job_config.maximum_bytes_billed = 1337
616+
617+
project = "test-project"
618+
job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE)
619+
job_reference["projectId"] = project
620+
query = "SELECT 17 AS num"
621+
resource = copy.deepcopy(QUERY_RESOURCE)
622+
resource["jobReference"] = job_reference
623+
resource["configuration"]["query"]["query"] = query
624+
data = {"jobReference": job_reference, "totalRows": 0, "rows": []}
625+
credentials_mock = mock.create_autospec(
626+
google.auth.credentials.Credentials, instance=True
627+
)
628+
default_patch = mock.patch(
629+
"google.auth.default", return_value=(credentials_mock, "general-project")
630+
)
631+
conn = magics.context._connection = make_connection(resource, data)
632+
list_rows_patch = mock.patch(
633+
"google.cloud.bigquery.client.Client.list_rows",
634+
return_value=google.cloud.bigquery.table._EmptyRowIterator(),
635+
)
636+
with list_rows_patch, default_patch:
637+
ip.run_cell_magic("bigquery", "", query)
638+
639+
_, req = conn.api_request.call_args_list[0]
640+
sent_config = req["data"]["configuration"]["query"]
641+
assert sent_config["maximumBytesBilled"] == "1337"
642+
643+
644+
@pytest.mark.usefixtures("ipython_interactive")
645+
def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter():
646+
ip = IPython.get_ipython()
647+
ip.extension_manager.load_extension("google.cloud.bigquery")
648+
magics.context._project = None
649+
650+
magics.context.default_query_job_config = job.QueryJobConfig(
651+
maximum_bytes_billed=10203
652+
)
653+
654+
project = "test-project"
655+
job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE)
656+
job_reference["projectId"] = project
657+
query = "SELECT 17 AS num"
658+
resource = copy.deepcopy(QUERY_RESOURCE)
659+
resource["jobReference"] = job_reference
660+
resource["configuration"]["query"]["query"] = query
661+
data = {"jobReference": job_reference, "totalRows": 0, "rows": []}
662+
credentials_mock = mock.create_autospec(
663+
google.auth.credentials.Credentials, instance=True
664+
)
665+
default_patch = mock.patch(
666+
"google.auth.default", return_value=(credentials_mock, "general-project")
667+
)
668+
conn = magics.context._connection = make_connection(resource, data)
669+
list_rows_patch = mock.patch(
670+
"google.cloud.bigquery.client.Client.list_rows",
671+
return_value=google.cloud.bigquery.table._EmptyRowIterator(),
672+
)
673+
with list_rows_patch, default_patch:
674+
ip.run_cell_magic("bigquery", "", query)
675+
676+
_, req = conn.api_request.call_args_list[0]
677+
sent_config = req["data"]["configuration"]["query"]
678+
assert sent_config["maximumBytesBilled"] == "10203"
679+
680+
551681
@pytest.mark.usefixtures("ipython_interactive")
552682
def test_bigquery_magic_with_project():
553683
ip = IPython.get_ipython()

0 commit comments

Comments
 (0)