diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a3da1b0d4..7f291dbd5 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 -# created: 2023-08-02T10:53:29.114535628Z + digest: sha256:4f9b3b106ad0beafc2c8a415e3f62c1a0cc23cabea115dbe841b848f581cfe99 +# created: 2023-10-18T20:26:37.410353675Z diff --git a/.gitignore b/.gitignore index 99c3a1444..168b201f6 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ docs.metadata # Virtual environment env/ +venv/ # Test logs coverage.xml diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 029bd342d..16170d0ca 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -113,30 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.3 \ - --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ - --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ - --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ - --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ - --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ - --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ - --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ - --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ - --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ - --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ - --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ - --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ - --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ - --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ - --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ - --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ - --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ - --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ - --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ - --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ - --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ - --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ - --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de +cryptography==41.0.4 \ + --hash=sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67 \ + --hash=sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311 \ + --hash=sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8 \ + --hash=sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13 \ + --hash=sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143 \ + --hash=sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f \ + --hash=sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829 \ + --hash=sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd \ + --hash=sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397 \ + --hash=sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac \ + --hash=sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d \ + --hash=sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a \ + --hash=sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839 \ + --hash=sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e \ + --hash=sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6 \ + --hash=sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9 \ + --hash=sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860 \ + --hash=sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca \ + --hash=sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91 \ + --hash=sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d \ + --hash=sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714 \ + --hash=sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb \ + --hash=sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f # via # gcp-releasetool # secretstorage @@ -382,6 +382,7 @@ protobuf==3.20.3 \ # gcp-docuploader # gcp-releasetool # google-api-core + # googleapis-common-protos pyasn1==0.4.8 \ --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \ --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba @@ -466,9 +467,9 @@ typing-extensions==4.4.0 \ --hash=sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa \ --hash=sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e # via -r requirements.in -urllib3==1.26.12 \ - --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \ - --hash=sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997 +urllib3==1.26.18 \ + --hash=sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 \ + --hash=sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0 # via # requests # twine diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 19409cbd3..6a8e16950 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.7.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 diff --git a/CHANGELOG.md b/CHANGELOG.md index a93bde9eb..41206fd78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.13.0](https://github.com/googleapis/python-bigquery/compare/v3.12.0...v3.13.0) (2023-10-30) + + +### Features + +* Add `Model.transform_columns` property ([#1661](https://github.com/googleapis/python-bigquery/issues/1661)) ([5ceed05](https://github.com/googleapis/python-bigquery/commit/5ceed056482f6d1f2fc45e7e6b84382de45c85ed)) +* Add support for dataset.default_rounding_mode ([#1688](https://github.com/googleapis/python-bigquery/issues/1688)) ([83bc768](https://github.com/googleapis/python-bigquery/commit/83bc768b90a852d258a4805603020a296e02d2f9)) + + +### Bug Fixes + +* AccessEntry API representation parsing ([#1682](https://github.com/googleapis/python-bigquery/issues/1682)) ([a40d7ae](https://github.com/googleapis/python-bigquery/commit/a40d7ae03149708fc34c962b43a6ac198780b6aa)) + + +### Documentation + +* Remove redundant `bigquery_update_table_expiration` code sample ([#1673](https://github.com/googleapis/python-bigquery/issues/1673)) ([2dded33](https://github.com/googleapis/python-bigquery/commit/2dded33626b3de6c4ab5e1229eb4c85786b2ff53)) +* Revised `create_partitioned_table` sample ([#1447](https://github.com/googleapis/python-bigquery/issues/1447)) ([40ba859](https://github.com/googleapis/python-bigquery/commit/40ba859059c3e463e17ea7781bc5a9aff8244c5d)) +* Revised relax column mode sample ([#1467](https://github.com/googleapis/python-bigquery/issues/1467)) ([b8c9276](https://github.com/googleapis/python-bigquery/commit/b8c9276be011d971b941b583fd3d4417d438067f)) + ## [3.12.0](https://github.com/googleapis/python-bigquery/compare/v3.11.4...v3.12.0) (2023-10-02) diff --git a/benchmark/README.md b/benchmark/README.md index 435926acb..33065807e 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -1,8 +1,128 @@ # BigQuery Benchmark -This directory contains benchmarks for BigQuery client. +This directory contains benchmark scripts for BigQuery client. It is created primarily for project +maintainers to measure library performance. ## Usage -`python benchmark.py queries.json` +`python benchmark.py` -BigQuery service caches requests so the benchmark should be run -at least twice, disregarding the first result. + +### Flags +Run `python benchmark.py -h` for detailed information on available flags. + +`--reruns` can be used to override the default number of times a query is rerun. Must be a positive +integer. Default value is 3. + +`--projectid` can be used to run benchmarks in a different project. If unset, the GOOGLE_CLOUD_PROJECT + environment variable is used. + +`--queryfile` can be used to override the default file which contains queries to be instrumented. + +`--table` can be used to specify a table to which benchmarking results should be streamed. The format +for this string is in BigQuery standard SQL notation without escapes, e.g. `projectid.datasetid.tableid` + +`--create_table` can be used to have the benchmarking tool create the destination table prior to streaming. + +`--tag` allows arbitrary key:value pairs to be set. This flag can be specified multiple times. + +When `--create_table` flag is set, must also specify the name of the new table using `--table`. + +### Example invocations + +Setting all the flags +``` +python benchmark.py \ + --reruns 5 \ + --projectid test_project_id \ + --table logging_project_id.querybenchmarks.measurements \ + --create_table \ + --tag source:myhostname \ + --tag somekeywithnovalue \ + --tag experiment:special_environment_thing +``` + +Or, a more realistic invocation using shell substitions: +``` +python benchmark.py \ + --reruns 5 \ + --table $BENCHMARK_TABLE \ + --tag origin:$(hostname) \ + --tag branch:$(git branch --show-current) \ + --tag latestcommit:$(git log --pretty=format:'%H' -n 1) +``` + +## Stream Results To A BigQuery Table + +When streaming benchmarking results to a BigQuery table, the table schema is as follows: +``` +[ + { + "name": "groupname", + "type": "STRING" + }, + { + "name": "name", + "type": "STRING" + }, + { + "name": "tags", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "key", + "type": "STRING" + }, + { + "name": "value", + "type": "STRING" + } + ] + }, + { + "name": "SQL", + "type": "STRING" + }, + { + "name": "runs", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + { + "name": "errorstring", + "type": "STRING" + }, + { + "name": "start_time", + "type": "TIMESTAMP" + }, + { + "name": "query_end_time", + "type": "TIMESTAMP" + }, + { + "name": "first_row_returned_time", + "type": "TIMESTAMP" + }, + { + "name": "all_rows_returned_time", + "type": "TIMESTAMP" + }, + { + "name": "total_rows", + "type": "INTEGER" + } + ] + }, + { + "name": "event_time", + "type": "TIMESTAMP" + } +] +``` + +The table schema is the same as the [benchmark in go](https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks), +so results from both languages can be streamed to the same table. + +## BigQuery Benchmarks In Other Languages +* Go: https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks +* JAVA: https://github.com/googleapis/java-bigquery/tree/main/benchmark diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 2917f169a..30e294baa 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,4 +1,4 @@ -# Copyright 2017 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,35 +12,312 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery +"""Scripts for benchmarking BigQuery queries performance.""" + +import argparse from datetime import datetime import json -import sys - -if len(sys.argv) < 2: - raise Exception('need query file, usage: python {0} '.format(sys.argv[0])) - -with open(sys.argv[1], 'r') as f: - queries = json.loads(f.read()) - -client = bigquery.Client() - -for query in queries: - start_time = datetime.now() - job = client.query(query) - rows = job.result() - - num_rows = 0 - num_cols = None - first_byte_time = None - - for row in rows: - if num_rows == 0: - num_cols = len(row) - first_byte_time = datetime.now() - start_time - elif num_cols != len(row): - raise Exception('found {0} columsn, expected {1}'.format(len(row), num_cols)) - num_rows += 1 - total_time = datetime.now() - start_time - print("query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec" - .format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds())) +import os + +from google.api_core import exceptions + +from google.cloud import bigquery + +_run_schema = [ + bigquery.SchemaField("groupname", "STRING", mode="NULLABLE"), + bigquery.SchemaField("name", "STRING", mode="NULLABLE"), + bigquery.SchemaField( + "tags", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("key", "STRING", mode="NULLABLE"), + bigquery.SchemaField("value", "STRING", mode="NULLABLE"), + ], + ), + bigquery.SchemaField("SQL", "STRING", mode="NULLABLE"), + bigquery.SchemaField( + "runs", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("errorstring", "STRING", mode="NULLABLE"), + bigquery.SchemaField("start_time", "TIMESTAMP", mode="NULLABLE"), + bigquery.SchemaField("query_end_time", "TIMESTAMP", mode="NULLABLE"), + bigquery.SchemaField( + "first_row_returned_time", "TIMESTAMP", mode="NULLABLE" + ), + bigquery.SchemaField( + "all_rows_returned_time", "TIMESTAMP", mode="NULLABLE" + ), + bigquery.SchemaField("total_rows", "INTEGER", mode="NULLABLE"), + ], + ), + bigquery.SchemaField("event_time", "TIMESTAMP", mode="NULLABLE"), +] + + +def _check_pos_int(value): + """Verifies the value is a positive integer.""" + ivalue = int(value) + if ivalue <= 0: + raise argparse.ArgumentTypeError( + f"Argument rerun should be positive int. Actual value: {value}" + ) + return ivalue + + +def _parse_tag(tag): + """Parses input tag into key value pair as a dict.""" + tagstring = str(tag) + key, value = tagstring.split(":") + if not key or not value: + raise argparse.ArgumentTypeError( + "key and value in tag need to be non-empty. Actual value: " + + f"key={key}, value={value}" + ) + return {"key": key, "value": value} + + +def _parse_args() -> dict: + """Parses input flags.""" + parser = argparse.ArgumentParser(description="Benchmark for BigQuery.") + + parser.add_argument( + "--reruns", + action="store", + type=_check_pos_int, + default=3, + metavar="", + help="how many times each query is run. Must be a positive integer." + + "Default 3 times", + ) + + parser.add_argument( + "--projectid", + action="store", + type=str, + metavar="", + help="run benchmarks in a different project. If unset, the " + + "GOOGLE_CLOUD_PROJECT environment variable is used", + ) + + parser.add_argument( + "--queryfile", + action="store", + type=str, + metavar="", + default="queries.json", + help="override the default file which contains queries to be instrumented", + ) + + parser.add_argument( + "--table", + action="store", + type=str, + metavar="", + help="specify a table to which benchmarking results should be " + + "streamed. The format for this string is in BigQuery standard SQL " + + "notation without escapes, e.g. projectid.datasetid.tableid", + ) + + parser.add_argument( + "--create_table", + action="store_true", + help="let the benchmarking tool create the destination table prior to" + + " streaming; if set, also need to set --table to specify table name", + ) + + parser.add_argument( + "--tag", + action="append", + type=_parse_tag, + metavar="", + help="set arbitrary key:value pairs, can be set multiple times", + ) + + args = parser.parse_args() + args_dict = vars(args) + + # Verifies that project id is set. + if not args_dict.get("projectid"): + if projectid_env := os.environ["GOOGLE_CLOUD_PROJECT"]: + args_dict["projectid"] = projectid_env + else: + raise ValueError( + "Must provide --projectid or set " + "GOOGLE_CLOUD_PROJECT environment variable" + ) + + # Verifies that table name is specified when `create_table == True`. + if args_dict.get("create_table") and not args_dict.get("table"): + raise ValueError( + "When --create_table is present, must specify table name with --table" + ) + + return args_dict + + +def _prepare_table(client, create_table: bool, table_name: str) -> str: + """Ensures a table exists, and optionally creates it if directed.""" + + # Verifies that table destination is of valid format. + parts = table_name.split(".") + if len(parts) != 3: + raise ValueError(f"Expected table in p.d.t format, got: {table_name}") + + table = bigquery.Table(table_name, schema=_run_schema) + + # Create table if create_table == True. + if create_table: + table = client.create_table(table) + print(f"Created table {table.project}.{table.dataset_id}." f"{table.table_id}") + + # Verifies that table exists. + client.get_table(table_name) + return table_name + + +def _run_query(client, query: str, rerun: int) -> list: + """Runs individual query for `rerun` times, and returns run results.""" + runs = [] + + for _ in range(rerun): + print(".", end="", flush=True) + run = {} + num_rows = 0 + num_cols = 0 + start_time = datetime.now() + first_row_time = datetime.min + end_time = datetime.min + + job = client.query(query) + query_end_time = datetime.now() + + try: + rows = job.result() + for row in rows: + if num_rows == 0: + num_cols = len(row) + first_row_time = datetime.now() + elif num_cols != len(row): + raise RuntimeError(f"found {len(row)} columns, expected {num_cols}") + num_rows += 1 + end_time = datetime.now() + except exceptions.BadRequest as exc: + run["errorstring"] = repr(exc) + + run["start_time"] = start_time.isoformat() + run["query_end_time"] = query_end_time.isoformat() + run["first_row_returned_time"] = first_row_time.isoformat() + run["all_rows_returned_time"] = end_time.isoformat() + run["total_rows"] = num_rows + runs.append(run) + + print("") + return runs + + +def _get_delta(time_str_1: str, time_str_2: str) -> str: + """Calculates delta of two ISO format time string, and return as a string.""" + time_1 = datetime.fromisoformat(time_str_1) + time_2 = datetime.fromisoformat(time_str_2) + delta = time_1 - time_2 + return str(delta) + + +def _is_datetime_min(time_str: str) -> bool: + return datetime.fromisoformat(time_str) == datetime.min + + +def _summary(run: dict) -> str: + """Coverts run dict to run summary string.""" + no_val = "NODATA" + output = ["QUERYTIME "] + + if not _is_datetime_min(run.get("query_end_time")): + output.append(f"{_get_delta(run.get('query_end_time'), run.get('start_time'))}") + else: + output.append(no_val) + output.append(" FIRSTROW ") + + if not _is_datetime_min(run.get("first_row_returned_time")): + output.append( + f"{_get_delta(run.get('first_row_returned_time'), run.get('start_time'))}" + ) + else: + output.append(no_val) + output += " ALLROWS " + + if not _is_datetime_min(run.get("all_rows_returned_time")): + output.append( + f"{_get_delta(run.get('all_rows_returned_time'), run.get('start_time'))}" + ) + else: + output.append(no_val) + + if run.get("total_rows"): + output.append(f" ROWS {run.get('total_rows')}") + if run.get("errorstring"): + output.append(f" ERRORED {run.get('errorstring')}") + + return "".join(output) + + +def _print_results(profiles: list): + for i, prof in enumerate(profiles): + print(f"{i+1}: ({prof['groupname']}:{prof['name']})") + print(f"SQL: {prof['SQL']}") + print("MEASUREMENTS") + for j, run in enumerate(prof["runs"]): + print(f"\t\t({j}) {_summary(run)}") + + +def _run_benchmarks(args: dict) -> list: + client = bigquery.Client() + + # If we're going to stream results, let's make sure we can do that + # before running all the tests. + table_id = "" + if args.get("create_table") or args.get("table"): + table_id = _prepare_table(client, args.get("create_table"), args.get("table")) + + queries_file = args.get("queryfile") + with open(queries_file, "r") as f: + groups = json.loads(f.read()) + + measure_start = datetime.now() + profiles = [] + for group_name, group in groups.items(): + for name, query in group.items(): + print(f"Measuring {group_name} : {name}", end="", flush=True) + event_time = datetime.now() + runs = _run_query(client, query, args.get("reruns")) + + profile = {} + profile["groupname"] = group_name + profile["name"] = name + profile["tags"] = args.get("tag") or [] + profile["SQL"] = query + profile["runs"] = runs + profile["event_time"] = event_time.isoformat() + profiles.append(profile) + + measure_end = datetime.now() + print(f"Measurement time: {str(measure_end-measure_start)}") + + # Stream benchmarking results to table, if required. + if table_id: + print(f"Streaming test results to table {table_id}...") + errors = client.insert_rows_json(table_id, profiles) + if errors: + raise RuntimeError(f"Cannot upload queries profiles: {errors}") + print("Streaming complete.") + + return profiles + + +if __name__ == "__main__": + args = _parse_args() + profiles = _run_benchmarks(args) + _print_results(profiles) diff --git a/benchmark/queries.json b/benchmark/queries.json index 13fed38b5..464395619 100644 --- a/benchmark/queries.json +++ b/benchmark/queries.json @@ -1,10 +1,16 @@ -[ - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000", - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000", - "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000000", - "SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000", - "SELECT title, id, timestamp, contributor_ip FROM `bigquery-public-data.samples.wikipedia` WHERE title like 'Blo%' ORDER BY id", - "SELECT * FROM `bigquery-public-data.baseball.games_post_wide` ORDER BY gameId", - "SELECT * FROM `bigquery-public-data.samples.github_nested` WHERE repository.has_downloads ORDER BY repository.created_at LIMIT 10000", - "SELECT repo_name, path FROM `bigquery-public-data.github_repos.files` WHERE path LIKE '%.java' ORDER BY id LIMIT 1000000" -] +{ + "simple-cacheable": { + "nycyellow-limit1k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000", + "nycyellow-limit10k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000", + "nycyellow-limit100k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000", + "wikisamples-ordered-limit1k":"SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000" + }, + "simple-nondeterministic": { + "current-timestamp":"SELECT CURRENT_TIMESTAMP() as ts", + "session-user": "SELECT SESSION_USER() as ts", + "literals": "SELECT 1 as i, 3.14 as pi" + }, + "simple-invalid": { + "invalid-query": "invalid sql here" + } +} diff --git a/docs/snippets.py b/docs/snippets.py index 3a46cd36c..72ac2a000 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -118,52 +118,6 @@ def test_create_client_default_credentials(): assert client is not None -def test_create_partitioned_table(client, to_delete): - dataset_id = "create_table_partitioned_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = client.create_dataset(dataset_ref) - to_delete.append(dataset) - - # [START bigquery_create_table_partitioned] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, 'my_dataset') - - table_ref = dataset_ref.table("my_partitioned_table") - schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - bigquery.SchemaField("date", "DATE"), - ] - table = bigquery.Table(table_ref, schema=schema) - table.time_partitioning = bigquery.TimePartitioning( - type_=bigquery.TimePartitioningType.DAY, - field="date", # name of column to use for partitioning - expiration_ms=7776000000, - ) # 90 days - - table = client.create_table(table) - - print( - "Created table {}, partitioned on column {}".format( - table.table_id, table.time_partitioning.field - ) - ) - # [END bigquery_create_table_partitioned] - - assert table.time_partitioning.type_ == "DAY" - assert table.time_partitioning.field == "date" - assert table.time_partitioning.expiration_ms == 7776000000 - - -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) @pytest.mark.skip( reason=( "update_table() is flaky " @@ -201,98 +155,6 @@ def test_update_table_description(client, to_delete): # [END bigquery_update_table_description] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_update_table_expiration(client, to_delete): - """Update a table's expiration time.""" - dataset_id = "update_table_expiration_dataset_{}".format(_millis()) - table_id = "update_table_expiration_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # TODO(thejaredchapman): After code sample has been updated from cloud.google.com delete this. - - # [START bigquery_update_table_expiration] - import datetime - - # from google.cloud import bigquery - # client = bigquery.Client() - # project = client.project - # dataset_ref = bigquery.DatasetReference(project, dataset_id) - # table_ref = dataset_ref.table('my_table') - # table = client.get_table(table_ref) # API request - - assert table.expires is None - - # set table to expire 5 days from now - expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( - days=5 - ) - table.expires = expiration - table = client.update_table(table, ["expires"]) # API request - - # expiration is stored in milliseconds - margin = datetime.timedelta(microseconds=1000) - assert expiration - margin <= table.expires <= expiration + margin - # [END bigquery_update_table_expiration] - - -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_relax_column(client, to_delete): - """Updates a schema field from required to nullable.""" - dataset_id = "relax_column_dataset_{}".format(_millis()) - table_id = "relax_column_table_{}".format(_millis()) - project = client.project - dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = bigquery.Dataset(dataset_ref) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - # [START bigquery_relax_column] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - original_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - - dataset_ref = bigquery.DatasetReference(project, dataset_id) - table_ref = dataset_ref.table(table_id) - table = bigquery.Table(table_ref, schema=original_schema) - table = client.create_table(table) - assert all(field.mode == "REQUIRED" for field in table.schema) - - # SchemaField properties cannot be edited after initialization. - # To make changes, construct new SchemaField objects. - relaxed_schema = [ - bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"), - bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"), - ] - table.schema = relaxed_schema - table = client.update_table(table, ["schema"]) - - assert all(field.mode == "NULLABLE" for field in table.schema) - # [END bigquery_relax_column] - - @pytest.mark.skip( reason=( "update_table() is flaky " diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 014a721a8..684cbfc12 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -20,7 +20,7 @@ import math import re import os -from typing import Any, Optional, Union +from typing import Optional, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -30,13 +30,6 @@ from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import packaging.version - -from google.cloud.bigquery.exceptions import ( - LegacyBigQueryStorageError, - LegacyPyarrowError, -) - _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" @@ -55,12 +48,6 @@ r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) -_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") - -_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") - -_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") - BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" """Environment variable defining host for emulator.""" @@ -72,129 +59,6 @@ def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) -class BQStorageVersions: - """Version comparisons for google-cloud-bigqueyr-storage package.""" - - def __init__(self): - self._installed_version = None - - @property - def installed_version(self) -> packaging.version.Version: - """Return the parsed version of google-cloud-bigquery-storage.""" - if self._installed_version is None: - from google.cloud import bigquery_storage - - self._installed_version = packaging.version.parse( - # Use 0.0.0, since it is earlier than any released version. - # Legacy versions also have the same property, but - # creating a LegacyVersion has been deprecated. - # https://github.com/pypa/packaging/issues/321 - getattr(bigquery_storage, "__version__", "0.0.0") - ) - - return self._installed_version # type: ignore - - @property - def is_read_session_optional(self) -> bool: - """True if read_session is optional to rows(). - - See: https://github.com/googleapis/python-bigquery-storage/pull/228 - """ - return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION - - def verify_version(self): - """Verify that a recent enough version of BigQuery Storage extra is - installed. - - The function assumes that google-cloud-bigquery-storage extra is - installed, and should thus be used in places where this assumption - holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Raises: - LegacyBigQueryStorageError: - If the google-cloud-bigquery-storage package is outdated. - """ - if self.installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." - ) - raise LegacyBigQueryStorageError(msg) - - -class PyarrowVersions: - """Version comparisons for pyarrow package.""" - - def __init__(self): - self._installed_version = None - - @property - def installed_version(self) -> packaging.version.Version: - """Return the parsed version of pyarrow.""" - if self._installed_version is None: - import pyarrow # type: ignore - - self._installed_version = packaging.version.parse( - # Use 0.0.0, since it is earlier than any released version. - # Legacy versions also have the same property, but - # creating a LegacyVersion has been deprecated. - # https://github.com/pypa/packaging/issues/321 - getattr(pyarrow, "__version__", "0.0.0") - ) - - return self._installed_version - - @property - def use_compliant_nested_type(self) -> bool: - return self.installed_version.major >= 4 - - def try_import(self, raise_if_error: bool = False) -> Any: - """Verify that a recent enough version of pyarrow extra is - installed. - - The function assumes that pyarrow extra is installed, and should thus - be used in places where this assumption holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Returns: - The ``pyarrow`` module or ``None``. - - Raises: - LegacyPyarrowError: - If the pyarrow package is outdated and ``raise_if_error`` is ``True``. - """ - try: - import pyarrow - except ImportError as exc: # pragma: NO COVER - if raise_if_error: - raise LegacyPyarrowError( - f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." - ) from exc - return None - - if self.installed_version < _MIN_PYARROW_VERSION: - if raise_if_error: - msg = ( - "Dependency pyarrow is outdated, please upgrade " - f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." - ) - raise LegacyPyarrowError(msg) - return None - - return pyarrow - - -BQ_STORAGE_VERSIONS = BQStorageVersions() -PYARROW_VERSIONS = PyarrowVersions() - - def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" return value is not None or (field is not None and field.mode != "NULLABLE") diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index a14dbec9b..53db9511c 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -23,9 +23,8 @@ import warnings from typing import Any, Union -from packaging import version - -from google.cloud.bigquery import _helpers +from google.cloud.bigquery import _pyarrow_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema try: @@ -49,7 +48,11 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype -pyarrow = _helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() + +_BIGNUMERIC_SUPPORT = False +if pyarrow is not None: + _BIGNUMERIC_SUPPORT = True try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` @@ -119,87 +122,6 @@ def __init__(self): self.done = False -def pyarrow_datetime(): - return pyarrow.timestamp("us", tz=None) - - -def pyarrow_numeric(): - return pyarrow.decimal128(38, 9) - - -def pyarrow_bignumeric(): - # 77th digit is partial. - # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types - return pyarrow.decimal256(76, 38) - - -def pyarrow_time(): - return pyarrow.time64("us") - - -def pyarrow_timestamp(): - return pyarrow.timestamp("us", tz="UTC") - - -if pyarrow: - # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py - # When modifying it be sure to update it there as well. - BQ_TO_ARROW_SCALARS = { - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, - } - ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER - -else: # pragma: NO COVER - BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER - ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER - - BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { "GEOGRAPHY": { b"ARROW:extension:name": b"google:sqlType:geography", @@ -240,7 +162,7 @@ def bq_to_arrow_data_type(field): if field_type_upper in schema._STRUCT_TYPES: return bq_to_arrow_struct_data_type(field) - data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper) + data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper) if data_type_constructor is None: return None return data_type_constructor() @@ -568,7 +490,9 @@ def augment_schema(dataframe, current_bq_schema): if pyarrow.types.is_list(arrow_table.type): # `pyarrow.ListType` detected_mode = "REPEATED" - detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id) + detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq( + arrow_table.values.type.id + ) # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds # it to such datetimes, causing them to be recognized as TIMESTAMP type. @@ -584,7 +508,7 @@ def augment_schema(dataframe, current_bq_schema): detected_type = "DATETIME" else: detected_mode = field.mode - detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) + detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) if detected_type is None: unknown_type_fields.append(field) @@ -705,13 +629,13 @@ def dataframe_to_parquet( This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ - pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) import pyarrow.parquet # type: ignore kwargs = ( {"use_compliant_nested_type": parquet_use_compliant_nested_type} - if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type else {} ) @@ -820,7 +744,7 @@ def _download_table_bqstorage_stream( # Avoid deprecation warnings for passing in unnecessary read session. # https://github.com/googleapis/python-bigquery-storage/issues/229 - if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: rowstream = reader.rows() else: rowstream = reader.rows(session) diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py new file mode 100644 index 000000000..7266e5e02 --- /dev/null +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -0,0 +1,123 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for connecting BigQuery and pyarrow.""" + +from typing import Any + +from packaging import version + +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None + + +def pyarrow_datetime(): + return pyarrow.timestamp("us", tz=None) + + +def pyarrow_numeric(): + return pyarrow.decimal128(38, 9) + + +def pyarrow_bignumeric(): + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + return pyarrow.decimal256(76, 38) + + +def pyarrow_time(): + return pyarrow.time64("us") + + +def pyarrow_timestamp(): + return pyarrow.timestamp("us", tz="UTC") + + +_BQ_TO_ARROW_SCALARS = {} +_ARROW_SCALAR_IDS_TO_BQ = {} + +if pyarrow: + # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py + # When modifying it be sure to update it there as well. + # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py + _BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, + } + + _ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + } + + # Adds bignumeric support only if pyarrow version >= 3.0.0 + # Decimal256 support was added to arrow 3.0.0 + # https://arrow.apache.org/blog/2021/01/25/3.0.0-release/ + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + + +def bq_to_arrow_scalars(bq_scalar: str): + """ + Returns: + The Arrow scalar type that the input BigQuery scalar type maps to. + If it cannot find the BigQuery scalar, return None. + """ + return _BQ_TO_ARROW_SCALARS.get(bq_scalar) + + +def arrow_scalar_ids_to_bq(arrow_scalar: Any): + """ + Returns: + The BigQuery scalar type that the input arrow scalar type maps to. + If it cannot find the arrow scalar, return None. + """ + return _ARROW_SCALAR_IDS_TO_BQ.get(arrow_scalar) diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py new file mode 100644 index 000000000..ce529b76e --- /dev/null +++ b/google/cloud/bigquery/_versions_helpers.py @@ -0,0 +1,173 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for verifying versions of installed modules.""" + +from typing import Any + +import packaging.version + +from google.cloud.bigquery import exceptions + + +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") +_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") + + +class PyarrowVersions: + """Version comparisons for pyarrow package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pyarrow.""" + if self._installed_version is None: + import pyarrow # type: ignore + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pyarrow, "__version__", "0.0.0") + ) + + return self._installed_version + + @property + def use_compliant_nested_type(self) -> bool: + return self.installed_version.major >= 4 + + def try_import(self, raise_if_error: bool = False) -> Any: + """Verifies that a recent enough version of pyarrow extra is installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite + the constraints in `setup.py`, the calling code can use this helper + to verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + exceptions.LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is + ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise exceptions.LegacyPyarrowError( + "pyarrow package not found. Install pyarrow version >=" + f" {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade" + f" it to version >= {_MIN_PYARROW_VERSION}" + f" (version found: {self.installed_version})." + ) + raise exceptions.LegacyPyarrowError(msg) + return None + + return pyarrow + + +PYARROW_VERSIONS = PyarrowVersions() + + +class BQStorageVersions: + """Version comparisons for google-cloud-bigqueyr-storage package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of google-cloud-bigquery-storage.""" + if self._installed_version is None: + from google.cloud import bigquery_storage + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(bigquery_storage, "__version__", "0.0.0") + ) + + return self._installed_version # type: ignore + + @property + def is_read_session_optional(self) -> bool: + """True if read_session is optional to rows(). + + See: https://github.com/googleapis/python-bigquery-storage/pull/228 + """ + return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + + def try_import(self, raise_if_error: bool = False) -> Any: + """Tries to import the bigquery_storage module, and returns results + accordingly. It also verifies the module version is recent enough. + + If the import succeeds, returns the ``bigquery_storage`` module. + + If the import fails, + returns ``None`` when ``raise_if_error == False``, + raises Error when ``raise_if_error == True``. + + Returns: + The ``bigquery_storage`` module or ``None``. + + Raises: + exceptions.BigQueryStorageNotFoundError: + If google-cloud-bigquery-storage is not installed + exceptions.LegacyBigQueryStorageError: + If google-cloud-bigquery-storage package is outdated + """ + try: + from google.cloud import bigquery_storage # type: ignore + except ImportError: + if raise_if_error: + msg = ( + "Package google-cloud-bigquery-storage not found. " + "Install google-cloud-bigquery-storage version >= " + f"{_MIN_BQ_STORAGE_VERSION}." + ) + raise exceptions.BigQueryStorageNotFoundError(msg) + return None + + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + if raise_if_error: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, " + f"please upgrade it to version >= {_MIN_BQ_STORAGE_VERSION} " + f"(version found: {self.installed_version})." + ) + raise exceptions.LegacyBigQueryStorageError(msg) + return None + + return bigquery_storage + + +BQ_STORAGE_VERSIONS = BQStorageVersions() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b4783fc56..496015b21 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,7 +27,6 @@ import json import math import os -import packaging.version import tempfile import typing from typing import ( @@ -45,13 +44,6 @@ import uuid import warnings -try: - import pyarrow # type: ignore - - _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -73,25 +65,25 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers -from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id +from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import enums +from google.cloud.bigquery import exceptions as bq_exceptions +from google.cloud.bigquery import job from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _DEFAULT_HOST -from google.cloud.bigquery._http import Connection -from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError -from google.cloud.bigquery.opentelemetry_tracing import create_span -from google.cloud.bigquery import job +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, CopyJobConfig, @@ -105,6 +97,7 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref +from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery.query import _QueryResults from google.cloud.bigquery.retry import ( DEFAULT_JOB_RETRY, @@ -120,8 +113,8 @@ from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import RowIterator -from google.cloud.bigquery.format_options import ParquetOptions -from google.cloud.bigquery import _helpers + +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() TimeoutType = Union[float, None] ResumableTimeoutType = Union[ @@ -159,9 +152,6 @@ TIMEOUT_HEADER = "X-Server-Timeout" -# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 -_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) - class Project(object): """Wrapper for resource describing a BigQuery project. @@ -476,7 +466,6 @@ def list_datasets( span_attributes = {"path": path} def api_request(*args, **kwargs): - return self._call_api( retry, span_name="BigQuery.listDatasets", @@ -555,29 +544,32 @@ def _ensure_bqstorage_client( An existing BigQuery Storage client instance. If ``None``, a new instance is created and returned. client_options: - Custom options used with a new BigQuery Storage client instance if one - is created. + Custom options used with a new BigQuery Storage client instance + if one is created. client_info: - The client info used with a new BigQuery Storage client instance if one - is created. + The client info used with a new BigQuery Storage client + instance if one is created. Returns: A BigQuery Storage API client. """ + try: - from google.cloud import bigquery_storage # type: ignore - except ImportError: + bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import( + raise_if_error=True + ) + except bq_exceptions.BigQueryStorageNotFoundError: warnings.warn( "Cannot create BigQuery Storage client, the dependency " "google-cloud-bigquery-storage is not installed." ) return None - - try: - BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) + except bq_exceptions.LegacyBigQueryStorageError as exc: + warnings.warn( + "Dependency google-cloud-bigquery-storage is outdated: " + str(exc) + ) return None + if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, @@ -2190,12 +2182,12 @@ def list_jobs( parent_job: Optional[Union[QueryJob, str]] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, - all_users: bool = None, + all_users: Optional[bool] = None, state_filter: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - min_creation_time: datetime.datetime = None, - max_creation_time: datetime.datetime = None, + min_creation_time: Optional[datetime.datetime] = None, + max_creation_time: Optional[datetime.datetime] = None, page_size: Optional[int] = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -2686,18 +2678,7 @@ def load_table_from_dataframe( os.close(tmpfd) try: - if new_job_config.source_format == job.SourceFormat.PARQUET: - if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: - msg = ( - "Loading dataframe data in PARQUET format with pyarrow " - f"{_PYARROW_VERSION} can result in data corruption. It is " - "therefore *strongly* advised to use a different pyarrow " - "version or a different source format. " - "See: https://github.com/googleapis/python-bigquery/issues/781" - ) - warnings.warn(msg, category=RuntimeWarning) - if new_job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -2716,13 +2697,12 @@ def load_table_from_dataframe( compression=parquet_compression, **( {"use_compliant_nested_type": True} - if _helpers.PYARROW_VERSIONS.use_compliant_nested_type + if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type else {} ), ) else: - dataframe.to_csv( tmppath, index=False, @@ -3427,7 +3407,7 @@ def insert_rows( self, table: Union[Table, TableReference, str], rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]], - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, **kwargs, ) -> Sequence[Dict[str, Any]]: """Insert rows into a table via the streaming API. @@ -3503,7 +3483,7 @@ def insert_rows_from_dataframe( self, table: Union[Table, TableReference, str], dataframe, - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, chunk_size: int = 500, **kwargs: Dict, ) -> Sequence[Sequence[dict]]: @@ -3566,8 +3546,8 @@ def insert_rows_json( row_ids: Union[ Iterable[Optional[str]], AutoRowIDs, None ] = AutoRowIDs.GENERATE_UUID, - skip_invalid_rows: bool = None, - ignore_unknown_values: bool = None, + skip_invalid_rows: Optional[bool] = None, + ignore_unknown_values: Optional[bool] = None, template_suffix: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, @@ -3758,7 +3738,7 @@ def list_partitions( def list_rows( self, table: Union[Table, TableListItem, TableReference, str], - selected_fields: Sequence[SchemaField] = None, + selected_fields: Optional[Sequence[SchemaField]] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, start_index: Optional[int] = None, @@ -3871,7 +3851,7 @@ def _list_rows_from_query_results( project: str, schema: SchemaField, total_rows: Optional[int] = None, - destination: Union[Table, TableReference, TableListItem, str] = None, + destination: Optional[Union[Table, TableReference, TableListItem, str]] = None, max_results: Optional[int] = None, start_index: Optional[int] = None, page_size: Optional[int] = None, diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index a9c1cd884..af94784a4 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -454,7 +454,6 @@ def __ne__(self, other): return not self == other def __repr__(self): - return f"" def _key(self): @@ -502,9 +501,7 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": if len(entry) != 0: raise ValueError("Entry has unexpected keys remaining.", entry) - config = cls(role, entity_type, entity_id) - config._properties = copy.deepcopy(resource) - return config + return cls(role, entity_type, entity_id) class Dataset(object): @@ -528,6 +525,7 @@ class Dataset(object): "friendly_name": "friendlyName", "default_encryption_configuration": "defaultEncryptionConfiguration", "storage_billing_model": "storageBillingModel", + "default_rounding_mode": "defaultRoundingMode", } def __init__(self, dataset_ref) -> None: @@ -535,6 +533,43 @@ def __init__(self, dataset_ref) -> None: dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} + @property + def default_rounding_mode(self): + """Union[str, None]: defaultRoundingMode of the dataset as set by the user + (defaults to :data:`None`). + + Set the value to one of ``'ROUND_HALF_AWAY_FROM_ZERO'``, ``'ROUND_HALF_EVEN'``, or + ``'ROUNDING_MODE_UNSPECIFIED'``. + + See `default rounding mode + `_ + in REST API docs and `updating the default rounding model + `_ + guide. + + Raises: + ValueError: for invalid value types. + """ + return self._properties.get("defaultRoundingMode") + + @default_rounding_mode.setter + def default_rounding_mode(self, value): + possible_values = [ + "ROUNDING_MODE_UNSPECIFIED", + "ROUND_HALF_AWAY_FROM_ZERO", + "ROUND_HALF_EVEN", + ] + if not isinstance(value, str) and value is not None: + raise ValueError("Pass a string, or None") + if value is None: + self._properties["defaultRoundingMode"] = "ROUNDING_MODE_UNSPECIFIED" + if value not in possible_values and value is not None: + raise ValueError( + f'rounding mode needs to be one of {",".join(possible_values)}' + ) + if value: + self._properties["defaultRoundingMode"] = value + @property def project(self): """str: Project ID of the project bound to the dataset.""" diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py index 2bab97fea..e94a6c832 100644 --- a/google/cloud/bigquery/exceptions.py +++ b/google/cloud/bigquery/exceptions.py @@ -23,3 +23,9 @@ class LegacyBigQueryStorageError(BigQueryError): class LegacyPyarrowError(BigQueryError): """Raised when too old a version of pyarrow package is detected at runtime.""" + + +class BigQueryStorageNotFoundError(BigQueryError): + """Raised when BigQuery Storage extra is not installed when trying to + import it. + """ diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 429e33e7e..57186acbc 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1231,7 +1231,6 @@ def referenced_tables(self): datasets_by_project_name = {} for table in self._job_statistics().get("referencedTables", ()): - t_project = table["projectId"] ds_id = table["datasetId"] @@ -1694,7 +1693,7 @@ def to_arrow( def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, @@ -1880,7 +1879,7 @@ def to_dataframe( def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index f92f77541..2a3583c66 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -104,6 +104,8 @@ import google.auth # type: ignore from google.cloud import bigquery import google.cloud.bigquery.dataset +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.magics import line_arg_parser as lap @@ -744,12 +746,31 @@ def _split_args_line(line): def _make_bqstorage_client(client, use_bqstorage_api, client_options): + """Creates a BigQuery Storage client. + + Args: + client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client. + use_bqstorage_api (bool): whether BigQuery Storage API is used or not. + client_options (:class:`google.api_core.client_options.ClientOptions`): + Custom options used with a new BigQuery Storage client instance + if one is created. + + Raises: + ImportError: if google-cloud-bigquery-storage is not installed, or + grpcio package is not installed. + + + Returns: + None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage + is outdated. + BigQuery Storage Client: + """ if not use_bqstorage_api: return None try: - from google.cloud import bigquery_storage # type: ignore # noqa: F401 - except ImportError as err: + _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) + except exceptions.BigQueryStorageNotFoundError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " "the missing google-cloud-bigquery-storage and pyarrow packages " @@ -757,6 +778,8 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): "the --use_rest_api magic option." ) raise customized_error from err + except exceptions.LegacyBigQueryStorageError: + pass try: from google.api_core.gapic_v1 import client_info as gapic_client_info diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 4d2bc346c..45a88ab22 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -16,6 +16,8 @@ """Define resources for the BigQuery ML Models API.""" +from __future__ import annotations # type: ignore + import copy import datetime import typing @@ -184,6 +186,21 @@ def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]: standard_sql.StandardSqlField.from_api_repr(column) for column in resource ] + @property + def transform_columns(self) -> Sequence[TransformColumn]: + """The input feature columns that were used to train this model. + The output transform columns used to train this model. + + See REST API: + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn + + Read-only. + """ + resources: Sequence[Dict[str, Any]] = typing.cast( + Sequence[Dict[str, Any]], self._properties.get("transformColumns", []) + ) + return [TransformColumn(resource) for resource in resources] + @property def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: """Label columns that were used to train this model. @@ -434,6 +451,60 @@ def __repr__(self): ) +class TransformColumn: + """TransformColumn represents a transform column feature. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn + + Args: + resource: + A dictionary representing a transform column feature. + """ + + def __init__(self, resource: Dict[str, Any]): + self._properties = resource + + @property + def name(self) -> Optional[str]: + """Name of the column.""" + return self._properties.get("name") + + @property + def type_(self) -> Optional[standard_sql.StandardSqlDataType]: + """Data type of the column after the transform. + + Returns: + Optional[google.cloud.bigquery.standard_sql.StandardSqlDataType]: + Data type of the column. + """ + type_json = self._properties.get("type") + if type_json is None: + return None + return standard_sql.StandardSqlDataType.from_api_repr(type_json) + + @property + def transform_sql(self) -> Optional[str]: + """The SQL expression used in the column transform.""" + return self._properties.get("transformSql") + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "TransformColumn": + """Constructs a transform column feature given its API representation + + Args: + resource: + Transform column feature representation from the API + + Returns: + Transform column feature parsed from ``resource``. + """ + this = cls({}) + resource = copy.deepcopy(resource) + this._properties = resource + return this + + def _model_arg_to_model_ref(value, default_project=None): """Helper to convert a string or Model to ModelReference. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 462447d51..dcba10428 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -60,14 +60,15 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions as bq_exceptions +from google.cloud.bigquery._tqdm_helpers import get_progress_bar +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import DefaultPandasDTypes -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery._tqdm_helpers import get_progress_bar -from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -1593,7 +1594,7 @@ def _is_completely_cached(self): return self._first_page_response.get(self._next_token) is None def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): - """Returns if the BigQuery Storage API can be used. + """Returns True if the BigQuery Storage API can be used. Returns: bool @@ -1610,13 +1611,10 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError: + _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) + except bq_exceptions.BigQueryStorageNotFoundError: return False - - try: - _helpers.BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: + except bq_exceptions.LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False @@ -1855,7 +1853,7 @@ def to_arrow( def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -1931,7 +1929,7 @@ def to_dataframe_iterable( def to_dataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_as_object: bool = False, @@ -2229,7 +2227,7 @@ def __can_cast_timestamp_ns(column): def to_geodataframe( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, - dtypes: Dict[str, Any] = None, + dtypes: Optional[Dict[str, Any]] = None, progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ea71d198b..ee029aced 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.12.0" +__version__ = "3.13.0" diff --git a/mypy.ini b/mypy.ini index 4505b4854..beaa679a8 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,3 +1,3 @@ [mypy] -python_version = 3.6 +python_version = 3.8 namespace_packages = True diff --git a/noxfile.py b/noxfile.py index 93616485f..a2b7a6843 100644 --- a/noxfile.py +++ b/noxfile.py @@ -22,10 +22,11 @@ import nox -MYPY_VERSION = "mypy==0.910" +MYPY_VERSION = "mypy==1.6.1" PYTYPE_VERSION = "pytype==2021.4.9" -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( + "benchmark", "docs", "google", "samples", @@ -136,7 +137,7 @@ def mypy(session): "types-requests", "types-setuptools", ) - session.run("mypy", "google/cloud") + session.run("mypy", "google/cloud", "--show-traceback") @nox.session(python=DEFAULT_PYTHON_VERSION) @@ -381,6 +382,7 @@ def lint(session): session.run("flake8", "tests") session.run("flake8", os.path.join("docs", "samples")) session.run("flake8", os.path.join("docs", "snippets.py")) + session.run("flake8", "benchmark") session.run("black", "--check", *BLACK_PATHS) diff --git a/samples/add_empty_column.py b/samples/add_empty_column.py index 6d449d6e2..5541a6738 100644 --- a/samples/add_empty_column.py +++ b/samples/add_empty_column.py @@ -14,7 +14,6 @@ def add_empty_column(table_id: str) -> None: - # [START bigquery_add_empty_column] from google.cloud import bigquery diff --git a/samples/browse_table_data.py b/samples/browse_table_data.py index 74b903aa3..2fba65aeb 100644 --- a/samples/browse_table_data.py +++ b/samples/browse_table_data.py @@ -14,7 +14,6 @@ def browse_table_data(table_id: str) -> None: - # [START bigquery_browse_table] from google.cloud import bigquery diff --git a/samples/client_list_jobs.py b/samples/client_list_jobs.py index 7f1e39cb8..335d2ecec 100644 --- a/samples/client_list_jobs.py +++ b/samples/client_list_jobs.py @@ -14,7 +14,6 @@ def client_list_jobs() -> None: - # [START bigquery_list_jobs] from google.cloud import bigquery diff --git a/samples/client_load_partitioned_table.py b/samples/client_load_partitioned_table.py index 9956f3f00..cfdf24819 100644 --- a/samples/client_load_partitioned_table.py +++ b/samples/client_load_partitioned_table.py @@ -14,7 +14,6 @@ def client_load_partitioned_table(table_id: str) -> None: - # [START bigquery_load_table_partitioned] from google.cloud import bigquery diff --git a/samples/client_query.py b/samples/client_query.py index 091d3f98b..4df051ee2 100644 --- a/samples/client_query.py +++ b/samples/client_query.py @@ -14,7 +14,6 @@ def client_query() -> None: - # [START bigquery_query] from google.cloud import bigquery diff --git a/samples/client_query_add_column.py b/samples/client_query_add_column.py index 2da200bc5..ec14087fb 100644 --- a/samples/client_query_add_column.py +++ b/samples/client_query_add_column.py @@ -14,7 +14,6 @@ def client_query_add_column(table_id: str) -> None: - # [START bigquery_add_column_query_append] from google.cloud import bigquery diff --git a/samples/client_query_batch.py b/samples/client_query_batch.py index df164d1be..5c55e278e 100644 --- a/samples/client_query_batch.py +++ b/samples/client_query_batch.py @@ -19,7 +19,6 @@ def client_query_batch() -> "bigquery.QueryJob": - # [START bigquery_query_batch] from google.cloud import bigquery diff --git a/samples/client_query_destination_table.py b/samples/client_query_destination_table.py index b200f1cc6..486576fea 100644 --- a/samples/client_query_destination_table.py +++ b/samples/client_query_destination_table.py @@ -14,7 +14,6 @@ def client_query_destination_table(table_id: str) -> None: - # [START bigquery_query_destination_table] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py index c4ab305f5..de9fff2d0 100644 --- a/samples/client_query_destination_table_clustered.py +++ b/samples/client_query_destination_table_clustered.py @@ -14,7 +14,6 @@ def client_query_destination_table_clustered(table_id: str) -> None: - # [START bigquery_query_clustered_table] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_cmek.py b/samples/client_query_destination_table_cmek.py index 0fd44d189..040c96e22 100644 --- a/samples/client_query_destination_table_cmek.py +++ b/samples/client_query_destination_table_cmek.py @@ -14,7 +14,6 @@ def client_query_destination_table_cmek(table_id: str, kms_key_name: str) -> None: - # [START bigquery_query_destination_table_cmek] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_legacy.py b/samples/client_query_destination_table_legacy.py index ee45d9a01..37f50fdb4 100644 --- a/samples/client_query_destination_table_legacy.py +++ b/samples/client_query_destination_table_legacy.py @@ -14,7 +14,6 @@ def client_query_destination_table_legacy(table_id: str) -> None: - # [START bigquery_query_legacy_large_results] from google.cloud import bigquery diff --git a/samples/client_query_dry_run.py b/samples/client_query_dry_run.py index 418b43cb5..bb4893c2a 100644 --- a/samples/client_query_dry_run.py +++ b/samples/client_query_dry_run.py @@ -19,7 +19,6 @@ def client_query_dry_run() -> "bigquery.QueryJob": - # [START bigquery_query_dry_run] from google.cloud import bigquery diff --git a/samples/client_query_legacy_sql.py b/samples/client_query_legacy_sql.py index c054e1f28..44917e4e0 100644 --- a/samples/client_query_legacy_sql.py +++ b/samples/client_query_legacy_sql.py @@ -14,7 +14,6 @@ def client_query_legacy_sql() -> None: - # [START bigquery_query_legacy] from google.cloud import bigquery diff --git a/samples/client_query_relax_column.py b/samples/client_query_relax_column.py index c96a1e7aa..22ecb33d1 100644 --- a/samples/client_query_relax_column.py +++ b/samples/client_query_relax_column.py @@ -14,7 +14,6 @@ def client_query_relax_column(table_id: str) -> None: - # [START bigquery_relax_column_query_append] from google.cloud import bigquery diff --git a/samples/client_query_w_array_params.py b/samples/client_query_w_array_params.py index 669713182..25592a94a 100644 --- a/samples/client_query_w_array_params.py +++ b/samples/client_query_w_array_params.py @@ -14,7 +14,6 @@ def client_query_w_array_params() -> None: - # [START bigquery_query_params_arrays] from google.cloud import bigquery diff --git a/samples/client_query_w_named_params.py b/samples/client_query_w_named_params.py index f42be1dc8..6dd72d44f 100644 --- a/samples/client_query_w_named_params.py +++ b/samples/client_query_w_named_params.py @@ -14,7 +14,6 @@ def client_query_w_named_params() -> None: - # [START bigquery_query_params_named] from google.cloud import bigquery diff --git a/samples/client_query_w_positional_params.py b/samples/client_query_w_positional_params.py index b088b305e..9cdde69ca 100644 --- a/samples/client_query_w_positional_params.py +++ b/samples/client_query_w_positional_params.py @@ -14,7 +14,6 @@ def client_query_w_positional_params() -> None: - # [START bigquery_query_params_positional] from google.cloud import bigquery diff --git a/samples/client_query_w_struct_params.py b/samples/client_query_w_struct_params.py index 6c5b78113..6b68e78ed 100644 --- a/samples/client_query_w_struct_params.py +++ b/samples/client_query_w_struct_params.py @@ -14,7 +14,6 @@ def client_query_w_struct_params() -> None: - # [START bigquery_query_params_structs] from google.cloud import bigquery diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index 07d64cc94..c1ade8782 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -14,7 +14,6 @@ def client_query_w_timestamp_params() -> None: - # [START bigquery_query_params_timestamps] import datetime diff --git a/samples/copy_table.py b/samples/copy_table.py index 8c6153fef..3ae6e9ebe 100644 --- a/samples/copy_table.py +++ b/samples/copy_table.py @@ -14,7 +14,6 @@ def copy_table(source_table_id: str, destination_table_id: str) -> None: - # [START bigquery_copy_table] from google.cloud import bigquery diff --git a/samples/copy_table_cmek.py b/samples/copy_table_cmek.py index f2e8a90f9..f03053fab 100644 --- a/samples/copy_table_cmek.py +++ b/samples/copy_table_cmek.py @@ -14,7 +14,6 @@ def copy_table_cmek(dest_table_id: str, orig_table_id: str, kms_key_name: str) -> None: - # [START bigquery_copy_table_cmek] from google.cloud import bigquery diff --git a/samples/copy_table_multiple_source.py b/samples/copy_table_multiple_source.py index 1163b1664..509b8951b 100644 --- a/samples/copy_table_multiple_source.py +++ b/samples/copy_table_multiple_source.py @@ -16,7 +16,6 @@ def copy_table_multiple_source(dest_table_id: str, table_ids: Sequence[str]) -> None: - # [START bigquery_copy_table_multiple_source] from google.cloud import bigquery diff --git a/samples/create_dataset.py b/samples/create_dataset.py index dea91798d..7f645933a 100644 --- a/samples/create_dataset.py +++ b/samples/create_dataset.py @@ -14,7 +14,6 @@ def create_dataset(dataset_id: str) -> None: - # [START bigquery_create_dataset] from google.cloud import bigquery diff --git a/samples/create_job.py b/samples/create_job.py index 129a08a1b..f335e2f7a 100644 --- a/samples/create_job.py +++ b/samples/create_job.py @@ -20,7 +20,6 @@ def create_job() -> "Union[LoadJob, CopyJob, ExtractJob, QueryJob]": - # [START bigquery_create_job] from google.cloud import bigquery diff --git a/samples/create_routine.py b/samples/create_routine.py index 96dc24210..8be1b6a99 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -19,7 +19,6 @@ def create_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_create_routine] from google.cloud import bigquery diff --git a/samples/create_routine_ddl.py b/samples/create_routine_ddl.py index 56c7cfe24..231d5a142 100644 --- a/samples/create_routine_ddl.py +++ b/samples/create_routine_ddl.py @@ -14,7 +14,6 @@ def create_routine_ddl(routine_id: str) -> None: - # [START bigquery_create_routine_ddl] from google.cloud import bigquery diff --git a/samples/create_table.py b/samples/create_table.py index eaac54696..7fda370ce 100644 --- a/samples/create_table.py +++ b/samples/create_table.py @@ -14,7 +14,6 @@ def create_table(table_id: str) -> None: - # [START bigquery_create_table] from google.cloud import bigquery diff --git a/samples/create_table_clustered.py b/samples/create_table_clustered.py index 1686c519a..a9ad43e59 100644 --- a/samples/create_table_clustered.py +++ b/samples/create_table_clustered.py @@ -19,7 +19,6 @@ def create_table_clustered(table_id: str) -> "bigquery.Table": - # [START bigquery_create_table_clustered] from google.cloud import bigquery diff --git a/samples/create_table_range_partitioned.py b/samples/create_table_range_partitioned.py index 4dc45ed58..128ab87d9 100644 --- a/samples/create_table_range_partitioned.py +++ b/samples/create_table_range_partitioned.py @@ -19,7 +19,6 @@ def create_table_range_partitioned(table_id: str) -> "bigquery.Table": - # [START bigquery_create_table_range_partitioned] from google.cloud import bigquery diff --git a/samples/dataset_exists.py b/samples/dataset_exists.py index 221899a65..784f86524 100644 --- a/samples/dataset_exists.py +++ b/samples/dataset_exists.py @@ -14,7 +14,6 @@ def dataset_exists(dataset_id: str) -> None: - # [START bigquery_dataset_exists] from google.cloud import bigquery from google.cloud.exceptions import NotFound diff --git a/samples/delete_dataset.py b/samples/delete_dataset.py index b340ed57a..9c7644db0 100644 --- a/samples/delete_dataset.py +++ b/samples/delete_dataset.py @@ -14,7 +14,6 @@ def delete_dataset(dataset_id: str) -> None: - # [START bigquery_delete_dataset] from google.cloud import bigquery diff --git a/samples/delete_dataset_labels.py b/samples/delete_dataset_labels.py index ec5df09c1..d5efdf4ea 100644 --- a/samples/delete_dataset_labels.py +++ b/samples/delete_dataset_labels.py @@ -19,7 +19,6 @@ def delete_dataset_labels(dataset_id: str) -> "bigquery.Dataset": - # [START bigquery_delete_label_dataset] from google.cloud import bigquery diff --git a/samples/delete_routine.py b/samples/delete_routine.py index 7362a5fea..604e7f730 100644 --- a/samples/delete_routine.py +++ b/samples/delete_routine.py @@ -14,7 +14,6 @@ def delete_routine(routine_id: str) -> None: - # [START bigquery_delete_routine] from google.cloud import bigquery diff --git a/samples/delete_table.py b/samples/delete_table.py index 9e7ee170a..a8ac4617a 100644 --- a/samples/delete_table.py +++ b/samples/delete_table.py @@ -14,7 +14,6 @@ def delete_table(table_id: str) -> None: - # [START bigquery_delete_table] from google.cloud import bigquery diff --git a/samples/download_public_data.py b/samples/download_public_data.py index a488bbbb5..cb2ebd1fd 100644 --- a/samples/download_public_data.py +++ b/samples/download_public_data.py @@ -14,7 +14,6 @@ def download_public_data() -> None: - # [START bigquery_pandas_public_data] from google.cloud import bigquery diff --git a/samples/download_public_data_sandbox.py b/samples/download_public_data_sandbox.py index ce5200b4e..e165a31ce 100644 --- a/samples/download_public_data_sandbox.py +++ b/samples/download_public_data_sandbox.py @@ -14,7 +14,6 @@ def download_public_data_sandbox() -> None: - # [START bigquery_pandas_public_data_sandbox] from google.cloud import bigquery diff --git a/samples/geography/insert_geojson.py b/samples/geography/insert_geojson.py index 2db407b55..9a6f6c413 100644 --- a/samples/geography/insert_geojson.py +++ b/samples/geography/insert_geojson.py @@ -18,7 +18,6 @@ def insert_geojson( override_values: Optional[Mapping[str, str]] = None ) -> Sequence[Dict[str, object]]: - if override_values is None: override_values = {} diff --git a/samples/geography/insert_wkt.py b/samples/geography/insert_wkt.py index 25c7ee727..2923d2596 100644 --- a/samples/geography/insert_wkt.py +++ b/samples/geography/insert_wkt.py @@ -18,7 +18,6 @@ def insert_wkt( override_values: Optional[Mapping[str, str]] = None ) -> Sequence[Dict[str, object]]: - if override_values is None: override_values = {} diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 714e032ad..9bc6ee32c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -42,4 +42,4 @@ Shapely==2.0.1 six==1.16.0 typing-extensions==4.7.1 typing-inspect==0.9.0 -urllib3==1.26.15 +urllib3==1.26.18 diff --git a/samples/get_dataset.py b/samples/get_dataset.py index 5654cbdce..1e4ad2904 100644 --- a/samples/get_dataset.py +++ b/samples/get_dataset.py @@ -14,7 +14,6 @@ def get_dataset(dataset_id: str) -> None: - # [START bigquery_get_dataset] from google.cloud import bigquery diff --git a/samples/get_dataset_labels.py b/samples/get_dataset_labels.py index d97ee3c01..8dc8b9430 100644 --- a/samples/get_dataset_labels.py +++ b/samples/get_dataset_labels.py @@ -14,7 +14,6 @@ def get_dataset_labels(dataset_id: str) -> None: - # [START bigquery_get_dataset_labels] from google.cloud import bigquery diff --git a/samples/get_routine.py b/samples/get_routine.py index 031d9a127..96e85acc9 100644 --- a/samples/get_routine.py +++ b/samples/get_routine.py @@ -19,7 +19,6 @@ def get_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_get_routine] from google.cloud import bigquery diff --git a/samples/get_table.py b/samples/get_table.py index 6195aaf9a..f71db7cee 100644 --- a/samples/get_table.py +++ b/samples/get_table.py @@ -14,7 +14,6 @@ def get_table(table_id: str) -> None: - # [START bigquery_get_table] from google.cloud import bigquery diff --git a/samples/label_dataset.py b/samples/label_dataset.py index a59743e5d..4fcc3dcd8 100644 --- a/samples/label_dataset.py +++ b/samples/label_dataset.py @@ -14,7 +14,6 @@ def label_dataset(dataset_id: str) -> None: - # [START bigquery_label_dataset] from google.cloud import bigquery diff --git a/samples/list_datasets.py b/samples/list_datasets.py index c1b6639a9..d9401e9ae 100644 --- a/samples/list_datasets.py +++ b/samples/list_datasets.py @@ -14,7 +14,6 @@ def list_datasets() -> None: - # [START bigquery_list_datasets] from google.cloud import bigquery diff --git a/samples/list_datasets_by_label.py b/samples/list_datasets_by_label.py index d1f264872..3a2bef632 100644 --- a/samples/list_datasets_by_label.py +++ b/samples/list_datasets_by_label.py @@ -14,7 +14,6 @@ def list_datasets_by_label() -> None: - # [START bigquery_list_datasets_by_label] from google.cloud import bigquery diff --git a/samples/list_routines.py b/samples/list_routines.py index bee7c23be..95ddd962e 100644 --- a/samples/list_routines.py +++ b/samples/list_routines.py @@ -14,7 +14,6 @@ def list_routines(dataset_id: str) -> None: - # [START bigquery_list_routines] from google.cloud import bigquery diff --git a/samples/list_tables.py b/samples/list_tables.py index df846961d..17c06370d 100644 --- a/samples/list_tables.py +++ b/samples/list_tables.py @@ -14,7 +14,6 @@ def list_tables(dataset_id: str) -> None: - # [START bigquery_list_tables] from google.cloud import bigquery diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py index 87b6c76ce..749746882 100644 --- a/samples/load_table_clustered.py +++ b/samples/load_table_clustered.py @@ -19,7 +19,6 @@ def load_table_clustered(table_id: str) -> "bigquery.Table": - # [START bigquery_load_table_clustered] from google.cloud import bigquery diff --git a/samples/load_table_dataframe.py b/samples/load_table_dataframe.py index db4c131f2..2c668d183 100644 --- a/samples/load_table_dataframe.py +++ b/samples/load_table_dataframe.py @@ -19,7 +19,6 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": - # [START bigquery_load_table_dataframe] import datetime diff --git a/samples/load_table_file.py b/samples/load_table_file.py index 00226eb3c..838c3b105 100644 --- a/samples/load_table_file.py +++ b/samples/load_table_file.py @@ -19,7 +19,6 @@ def load_table_file(file_path: str, table_id: str) -> "bigquery.Table": - # [START bigquery_load_from_file] from google.cloud import bigquery diff --git a/samples/load_table_uri_autodetect_csv.py b/samples/load_table_uri_autodetect_csv.py index c412c63f1..ca4590581 100644 --- a/samples/load_table_uri_autodetect_csv.py +++ b/samples/load_table_uri_autodetect_csv.py @@ -14,7 +14,6 @@ def load_table_uri_autodetect_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv_autodetect] from google.cloud import bigquery diff --git a/samples/load_table_uri_autodetect_json.py b/samples/load_table_uri_autodetect_json.py index 9d0bc3f22..00e8dc1b2 100644 --- a/samples/load_table_uri_autodetect_json.py +++ b/samples/load_table_uri_autodetect_json.py @@ -14,7 +14,6 @@ def load_table_uri_autodetect_json(table_id: str) -> None: - # [START bigquery_load_table_gcs_json_autodetect] from google.cloud import bigquery diff --git a/samples/load_table_uri_avro.py b/samples/load_table_uri_avro.py index e9f7c39ed..a0e8c86a6 100644 --- a/samples/load_table_uri_avro.py +++ b/samples/load_table_uri_avro.py @@ -14,7 +14,6 @@ def load_table_uri_avro(table_id: str) -> None: - # [START bigquery_load_table_gcs_avro] from google.cloud import bigquery diff --git a/samples/load_table_uri_cmek.py b/samples/load_table_uri_cmek.py index 4dfc0d3b4..d54422028 100644 --- a/samples/load_table_uri_cmek.py +++ b/samples/load_table_uri_cmek.py @@ -14,7 +14,6 @@ def load_table_uri_cmek(table_id: str, kms_key_name: str) -> None: - # [START bigquery_load_table_gcs_json_cmek] from google.cloud import bigquery diff --git a/samples/load_table_uri_csv.py b/samples/load_table_uri_csv.py index 9cb8c6f20..d660a2195 100644 --- a/samples/load_table_uri_csv.py +++ b/samples/load_table_uri_csv.py @@ -14,7 +14,6 @@ def load_table_uri_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv] from google.cloud import bigquery diff --git a/samples/load_table_uri_orc.py b/samples/load_table_uri_orc.py index 7babd2630..c09129216 100644 --- a/samples/load_table_uri_orc.py +++ b/samples/load_table_uri_orc.py @@ -14,7 +14,6 @@ def load_table_uri_orc(table_id: str) -> None: - # [START bigquery_load_table_gcs_orc] from google.cloud import bigquery diff --git a/samples/load_table_uri_truncate_avro.py b/samples/load_table_uri_truncate_avro.py index 51c6636fa..307a4e4de 100644 --- a/samples/load_table_uri_truncate_avro.py +++ b/samples/load_table_uri_truncate_avro.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_avro(table_id: str) -> None: - # [START bigquery_load_table_gcs_avro_truncate] import io diff --git a/samples/load_table_uri_truncate_csv.py b/samples/load_table_uri_truncate_csv.py index ee8b34043..4bfd306cd 100644 --- a/samples/load_table_uri_truncate_csv.py +++ b/samples/load_table_uri_truncate_csv.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_csv(table_id: str) -> None: - # [START bigquery_load_table_gcs_csv_truncate] import io diff --git a/samples/load_table_uri_truncate_json.py b/samples/load_table_uri_truncate_json.py index e85e0808e..a05a3eda0 100644 --- a/samples/load_table_uri_truncate_json.py +++ b/samples/load_table_uri_truncate_json.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_json(table_id: str) -> None: - # [START bigquery_load_table_gcs_json_truncate] import io diff --git a/samples/load_table_uri_truncate_orc.py b/samples/load_table_uri_truncate_orc.py index c730099d1..1c704b745 100644 --- a/samples/load_table_uri_truncate_orc.py +++ b/samples/load_table_uri_truncate_orc.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_orc(table_id: str) -> None: - # [START bigquery_load_table_gcs_orc_truncate] import io diff --git a/samples/load_table_uri_truncate_parquet.py b/samples/load_table_uri_truncate_parquet.py index 3a0a55c8a..d74f79910 100644 --- a/samples/load_table_uri_truncate_parquet.py +++ b/samples/load_table_uri_truncate_parquet.py @@ -14,7 +14,6 @@ def load_table_uri_truncate_parquet(table_id: str) -> None: - # [START bigquery_load_table_gcs_parquet_truncate] import io diff --git a/samples/query_external_gcs_temporary_table.py b/samples/query_external_gcs_temporary_table.py index 9bcb86aab..d622ab1a3 100644 --- a/samples/query_external_gcs_temporary_table.py +++ b/samples/query_external_gcs_temporary_table.py @@ -14,7 +14,6 @@ def query_external_gcs_temporary_table() -> None: - # [START bigquery_query_external_gcs_temp] from google.cloud import bigquery diff --git a/samples/query_external_sheets_permanent_table.py b/samples/query_external_sheets_permanent_table.py index a5855e66a..f23f44259 100644 --- a/samples/query_external_sheets_permanent_table.py +++ b/samples/query_external_sheets_permanent_table.py @@ -14,7 +14,6 @@ def query_external_sheets_permanent_table(dataset_id: str) -> None: - # [START bigquery_query_external_sheets_perm] from google.cloud import bigquery import google.auth diff --git a/samples/query_external_sheets_temporary_table.py b/samples/query_external_sheets_temporary_table.py index 944d3b826..876e4cc1a 100644 --- a/samples/query_external_sheets_temporary_table.py +++ b/samples/query_external_sheets_temporary_table.py @@ -14,7 +14,6 @@ def query_external_sheets_temporary_table() -> None: - # [START bigquery_query_external_sheets_temp] # [START bigquery_auth_drive_scope] from google.cloud import bigquery diff --git a/samples/query_no_cache.py b/samples/query_no_cache.py index f39c01dbc..7501b7fc0 100644 --- a/samples/query_no_cache.py +++ b/samples/query_no_cache.py @@ -14,7 +14,6 @@ def query_no_cache() -> None: - # [START bigquery_query_no_cache] from google.cloud import bigquery diff --git a/samples/query_pagination.py b/samples/query_pagination.py index 2e1654050..7ccaecff7 100644 --- a/samples/query_pagination.py +++ b/samples/query_pagination.py @@ -14,7 +14,6 @@ def query_pagination() -> None: - # [START bigquery_query_pagination] from google.cloud import bigquery diff --git a/samples/query_to_arrow.py b/samples/query_to_arrow.py index 157a93638..f1afc7c94 100644 --- a/samples/query_to_arrow.py +++ b/samples/query_to_arrow.py @@ -19,7 +19,6 @@ def query_to_arrow() -> "pyarrow.Table": - # [START bigquery_query_to_arrow] from google.cloud import bigquery diff --git a/samples/snippets/authenticate_service_account_test.py b/samples/snippets/authenticate_service_account_test.py index 4b5711f80..fbdd2d064 100644 --- a/samples/snippets/authenticate_service_account_test.py +++ b/samples/snippets/authenticate_service_account_test.py @@ -17,7 +17,7 @@ import google.auth -import authenticate_service_account +import authenticate_service_account # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/authorized_view_tutorial_test.py b/samples/snippets/authorized_view_tutorial_test.py index cae870486..e2220fb54 100644 --- a/samples/snippets/authorized_view_tutorial_test.py +++ b/samples/snippets/authorized_view_tutorial_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import authorized_view_tutorial +import authorized_view_tutorial # type: ignore @pytest.fixture(scope="module") diff --git a/samples/snippets/create_partitioned_table.py b/samples/snippets/create_partitioned_table.py new file mode 100644 index 000000000..0277d7d0f --- /dev/null +++ b/samples/snippets/create_partitioned_table.py @@ -0,0 +1,45 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_partitioned_table(table_id): + your_fully_qualified_table_id = table_id + + # [START bigquery_create_table_partitioned] + from google.cloud import bigquery + + client = bigquery.Client() + + # Use format "your-project.your_dataset.your_table_name" for table_id + table_id = your_fully_qualified_table_id + schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), + ] + table = bigquery.Table(table_id, schema=schema) + table.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", # name of column to use for partitioning + expiration_ms=1000 * 60 * 60 * 24 * 90, + ) # 90 days + + table = client.create_table(table) + + print( + f"Created table {table.project}.{table.dataset_id}.{table.table_id}, " + f"partitioned on column {table.time_partitioning.field}." + ) + # [END bigquery_create_table_partitioned] + return table diff --git a/samples/snippets/create_partitioned_table_test.py b/samples/snippets/create_partitioned_table_test.py new file mode 100644 index 000000000..e4d7ec20e --- /dev/null +++ b/samples/snippets/create_partitioned_table_test.py @@ -0,0 +1,34 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import create_partitioned_table # type: ignore + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_partitioned_table( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + table = create_partitioned_table.create_partitioned_table(random_table_id) + + out, _ = capsys.readouterr() + assert "Created" in out + assert random_table_id in out + + assert table.time_partitioning.type_ == "DAY" + assert table.time_partitioning.field == "date" diff --git a/samples/snippets/create_table_cmek_test.py b/samples/snippets/create_table_cmek_test.py index 429baf3fd..e8626b84c 100644 --- a/samples/snippets/create_table_cmek_test.py +++ b/samples/snippets/create_table_cmek_test.py @@ -14,7 +14,7 @@ import typing -import create_table_cmek +import create_table_cmek # type: ignore if typing.TYPE_CHECKING: import pytest @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - kms_key_name = ( "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" ) diff --git a/samples/snippets/create_table_external_data_configuration_test.py b/samples/snippets/create_table_external_data_configuration_test.py index 7bbcde32b..bf81a75f9 100644 --- a/samples/snippets/create_table_external_data_configuration_test.py +++ b/samples/snippets/create_table_external_data_configuration_test.py @@ -14,7 +14,7 @@ import typing -import create_table_external_data_configuration +import create_table_external_data_configuration # type: ignore if typing.TYPE_CHECKING: import pytest @@ -24,7 +24,6 @@ def test_create_table_external_data_configuration( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - create_table_external_data_configuration.create_table_external_data_configuration( random_table_id ) diff --git a/samples/snippets/create_table_external_hive_partitioned_test.py b/samples/snippets/create_table_external_hive_partitioned_test.py index 37deb8b12..5b8cbe1c3 100644 --- a/samples/snippets/create_table_external_hive_partitioned_test.py +++ b/samples/snippets/create_table_external_hive_partitioned_test.py @@ -14,7 +14,7 @@ import typing -import create_table_external_hive_partitioned +import create_table_external_hive_partitioned # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/create_table_schema_from_json_test.py b/samples/snippets/create_table_schema_from_json_test.py index e99b92672..e725d3ccf 100644 --- a/samples/snippets/create_table_schema_from_json_test.py +++ b/samples/snippets/create_table_schema_from_json_test.py @@ -14,7 +14,7 @@ import typing -import create_table_schema_from_json +import create_table_schema_from_json # type: ignore if typing.TYPE_CHECKING: import pytest @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - create_table_schema_from_json.create_table(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/create_table_snapshot_test.py b/samples/snippets/create_table_snapshot_test.py index f1d8d0f7b..17ef24d26 100644 --- a/samples/snippets/create_table_snapshot_test.py +++ b/samples/snippets/create_table_snapshot_test.py @@ -14,7 +14,7 @@ import typing -import create_table_snapshot +import create_table_snapshot # type: ignore if typing.TYPE_CHECKING: import pytest @@ -25,7 +25,6 @@ def test_create_table_snapshot( table_id: str, random_table_id: str, ) -> None: - create_table_snapshot.create_table_snapshot(table_id, random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/dataset_access_test.py b/samples/snippets/dataset_access_test.py index cc6a9af61..e3a53b084 100644 --- a/samples/snippets/dataset_access_test.py +++ b/samples/snippets/dataset_access_test.py @@ -14,8 +14,8 @@ import typing -import revoke_dataset_access -import update_dataset_access +import revoke_dataset_access # type: ignore +import update_dataset_access # type: ignore if typing.TYPE_CHECKING: from google.cloud import bigquery diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py index ac9d52dcf..88eeae1ed 100644 --- a/samples/snippets/delete_job_test.py +++ b/samples/snippets/delete_job_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import delete_job +import delete_job # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/delete_label_table_test.py b/samples/snippets/delete_label_table_test.py index 54acae77f..01e538ae3 100644 --- a/samples/snippets/delete_label_table_test.py +++ b/samples/snippets/delete_label_table_test.py @@ -14,7 +14,7 @@ import typing -import delete_label_table +import delete_label_table # type: ignore if typing.TYPE_CHECKING: import pytest @@ -24,7 +24,6 @@ def test_delete_label_table( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - table = delete_label_table.delete_label_table(table_id, "color") out, _ = capsys.readouterr() diff --git a/samples/snippets/get_table_labels_test.py b/samples/snippets/get_table_labels_test.py index f922e728c..e910d6a65 100644 --- a/samples/snippets/get_table_labels_test.py +++ b/samples/snippets/get_table_labels_test.py @@ -16,7 +16,7 @@ from google.cloud import bigquery -import get_table_labels +import get_table_labels # type: ignore if typing.TYPE_CHECKING: import pytest @@ -42,7 +42,6 @@ def test_get_table_labels_no_label( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - get_table_labels.get_table_labels(table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/get_table_make_schema_test.py b/samples/snippets/get_table_make_schema_test.py index 424f16e39..b1a1623bb 100644 --- a/samples/snippets/get_table_make_schema_test.py +++ b/samples/snippets/get_table_make_schema_test.py @@ -14,7 +14,7 @@ import typing -import get_table_make_schema +import get_table_make_schema # type: ignore if typing.TYPE_CHECKING: import pathlib diff --git a/samples/snippets/label_table_test.py b/samples/snippets/label_table_test.py index a77fb4b75..49f5406ab 100644 --- a/samples/snippets/label_table_test.py +++ b/samples/snippets/label_table_test.py @@ -14,7 +14,7 @@ import typing -import label_table +import label_table # type: ignore if typing.TYPE_CHECKING: import pytest @@ -24,7 +24,6 @@ def test_label_table( capsys: "pytest.CaptureFixture[str]", table_id: str, ) -> None: - label_table.label_table(table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/load_table_schema_from_json_test.py b/samples/snippets/load_table_schema_from_json_test.py index 267a6786c..745793cd7 100644 --- a/samples/snippets/load_table_schema_from_json_test.py +++ b/samples/snippets/load_table_schema_from_json_test.py @@ -14,7 +14,7 @@ import typing -import load_table_schema_from_json +import load_table_schema_from_json # type: ignore if typing.TYPE_CHECKING: import pytest @@ -24,7 +24,6 @@ def test_load_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - load_table_schema_from_json.load_table(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py index 552fa2e35..e19378a04 100644 --- a/samples/snippets/load_table_uri_firestore_test.py +++ b/samples/snippets/load_table_uri_firestore_test.py @@ -14,7 +14,7 @@ import typing -import load_table_uri_firestore +import load_table_uri_firestore # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/manage_job_test.py b/samples/snippets/manage_job_test.py index 630be365b..2ef4be2e0 100644 --- a/samples/snippets/manage_job_test.py +++ b/samples/snippets/manage_job_test.py @@ -15,8 +15,8 @@ from google.cloud import bigquery import pytest -import manage_job_cancel -import manage_job_get +import manage_job_cancel # type: ignore +import manage_job_get # type: ignore def test_manage_job(capsys: pytest.CaptureFixture[str]) -> None: diff --git a/samples/snippets/materialized_view_test.py b/samples/snippets/materialized_view_test.py index 70869346f..59e08131e 100644 --- a/samples/snippets/materialized_view_test.py +++ b/samples/snippets/materialized_view_test.py @@ -20,7 +20,7 @@ from google.cloud import bigquery import pytest -import materialized_view +import materialized_view # type: ignore def temp_suffix() -> str: diff --git a/samples/snippets/natality_tutorial_test.py b/samples/snippets/natality_tutorial_test.py index f56738528..7f24ca5cb 100644 --- a/samples/snippets/natality_tutorial_test.py +++ b/samples/snippets/natality_tutorial_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import natality_tutorial +import natality_tutorial # type: ignore @pytest.fixture(scope="module") diff --git a/samples/snippets/nested_repeated_schema_test.py b/samples/snippets/nested_repeated_schema_test.py index 0386fc8fb..67815dcf6 100644 --- a/samples/snippets/nested_repeated_schema_test.py +++ b/samples/snippets/nested_repeated_schema_test.py @@ -14,7 +14,7 @@ import typing -import nested_repeated_schema +import nested_repeated_schema # type: ignore if typing.TYPE_CHECKING: import pytest @@ -24,7 +24,6 @@ def test_create_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, ) -> None: - nested_repeated_schema.nested_schema(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py index f9628da7d..8f7f05c73 100644 --- a/samples/snippets/quickstart.py +++ b/samples/snippets/quickstart.py @@ -18,7 +18,6 @@ def run_quickstart(override_values: Optional[Dict[str, str]] = None) -> None: - if override_values is None: override_values = {} diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index 610c63c3b..88a24618d 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -18,7 +18,7 @@ from google.cloud import bigquery import pytest -import quickstart +import quickstart # type: ignore # Must match the dataset listed in quickstart.py (there's no easy way to # extract this). @@ -43,7 +43,6 @@ def test_quickstart( client: bigquery.Client, datasets_to_delete: List[str], ) -> None: - override_values = { "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), } diff --git a/samples/snippets/relax_column.py b/samples/snippets/relax_column.py new file mode 100644 index 000000000..bcd79cee8 --- /dev/null +++ b/samples/snippets/relax_column.py @@ -0,0 +1,52 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + + +def relax_column(table_id: str) -> bigquery.Table: + orig_table_id = table_id + + # [START bigquery_relax_column] + from google.cloud import bigquery + + client = bigquery.Client() + + # TODO(dev): Change table_id to full name of the table you want to create. + table_id = "your-project.your_dataset.your_table" + + # [END bigquery_relax_column] + table_id = orig_table_id + + # [START bigquery_relax_column] + table = client.get_table(table_id) + new_schema = [] + for field in table.schema: + if field.mode != "REQUIRED": + new_schema.append(field) + else: + # SchemaField properties cannot be edited after initialization. + # To make changes, construct new SchemaField objects. + new_field = field.to_api_repr() + new_field["mode"] = "NULLABLE" + relaxed_field = bigquery.SchemaField.from_api_repr(new_field) + new_schema.append(relaxed_field) + + table.schema = new_schema + table = client.update_table(table, ["schema"]) + + print(f"Updated {table_id} schema: {table.schema}.") + + # [END bigquery_relax_column] + return table diff --git a/samples/snippets/relax_column_test.py b/samples/snippets/relax_column_test.py new file mode 100644 index 000000000..ede1c3ab7 --- /dev/null +++ b/samples/snippets/relax_column_test.py @@ -0,0 +1,46 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from google.cloud import bigquery + +import relax_column # type: ignore + +if typing.TYPE_CHECKING: + import pytest + + +def test_relax_column( + capsys: "pytest.CaptureFixture[str]", + bigquery_client: bigquery.Client, + random_table_id: str, +) -> None: + table = bigquery.Table( + random_table_id, + schema=[ + bigquery.SchemaField("string_col", "STRING", mode="NULLABLE"), + bigquery.SchemaField("string_col2", "STRING", mode="REQUIRED"), + ], + ) + + bigquery_client.create_table(table) + table = relax_column.relax_column(random_table_id) + + out, _ = capsys.readouterr() + + assert all(field.mode == "NULLABLE" for field in table.schema) + assert "REQUIRED" not in out + assert "NULLABLE" in out + assert random_table_id in out diff --git a/samples/snippets/simple_app_test.py b/samples/snippets/simple_app_test.py index de4e1ce34..4bf0bb49c 100644 --- a/samples/snippets/simple_app_test.py +++ b/samples/snippets/simple_app_test.py @@ -14,7 +14,7 @@ import typing -import simple_app +import simple_app # type: ignore if typing.TYPE_CHECKING: import pytest diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py index ef5ec196a..d03114a36 100644 --- a/samples/snippets/test_update_with_dml.py +++ b/samples/snippets/test_update_with_dml.py @@ -17,8 +17,8 @@ from google.cloud import bigquery import pytest -from conftest import prefixer -import update_with_dml +from conftest import prefixer # type: ignore +import update_with_dml # type: ignore @pytest.fixture diff --git a/samples/snippets/update_table_expiration_test.py b/samples/snippets/update_table_expiration_test.py index 721bf53aa..ed68a8c2c 100644 --- a/samples/snippets/update_table_expiration_test.py +++ b/samples/snippets/update_table_expiration_test.py @@ -15,7 +15,7 @@ import datetime import typing -import update_table_expiration +import update_table_expiration # type: ignore if typing.TYPE_CHECKING: import pathlib @@ -28,7 +28,6 @@ def test_update_table_expiration( table_id: str, tmp_path: "pathlib.Path", ) -> None: - # This was not needed for function, only for test expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( days=5 diff --git a/samples/snippets/user_credentials_test.py b/samples/snippets/user_credentials_test.py index df8a6354d..8448187de 100644 --- a/samples/snippets/user_credentials_test.py +++ b/samples/snippets/user_credentials_test.py @@ -19,7 +19,7 @@ import mock import pytest -from user_credentials import main +from user_credentials import main # type: ignore PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] diff --git a/samples/snippets/view.py b/samples/snippets/view.py index 5e976f68a..94f406890 100644 --- a/samples/snippets/view.py +++ b/samples/snippets/view.py @@ -127,7 +127,6 @@ def update_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.T def grant_access( override_values: Optional[OverridesDict] = None, ) -> Tuple["bigquery.Dataset", "bigquery.Dataset"]: - if override_values is None: override_values = {} diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py index 4d0d43b77..1e615db47 100644 --- a/samples/snippets/view_test.py +++ b/samples/snippets/view_test.py @@ -19,7 +19,7 @@ from google.cloud import bigquery import pytest -import view +import view # type: ignore def temp_suffix() -> str: diff --git a/samples/table_exists.py b/samples/table_exists.py index 6edba9239..c19d419ae 100644 --- a/samples/table_exists.py +++ b/samples/table_exists.py @@ -14,7 +14,6 @@ def table_exists(table_id: str) -> None: - # [START bigquery_table_exists] from google.cloud import bigquery from google.cloud.exceptions import NotFound diff --git a/samples/table_insert_rows.py b/samples/table_insert_rows.py index 8aa723fe0..d680b4c1e 100644 --- a/samples/table_insert_rows.py +++ b/samples/table_insert_rows.py @@ -14,7 +14,6 @@ def table_insert_rows(table_id: str) -> None: - # [START bigquery_table_insert_rows] from google.cloud import bigquery diff --git a/samples/table_insert_rows_explicit_none_insert_ids.py b/samples/table_insert_rows_explicit_none_insert_ids.py index b2bd06372..bbde034f7 100644 --- a/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/samples/table_insert_rows_explicit_none_insert_ids.py @@ -14,7 +14,6 @@ def table_insert_rows_explicit_none_insert_ids(table_id: str) -> None: - # [START bigquery_table_insert_rows_explicit_none_insert_ids] from google.cloud import bigquery diff --git a/samples/tests/test_add_empty_column.py b/samples/tests/test_add_empty_column.py index 5c7184766..95d554621 100644 --- a/samples/tests/test_add_empty_column.py +++ b/samples/tests/test_add_empty_column.py @@ -21,7 +21,6 @@ def test_add_empty_column(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: - add_empty_column.add_empty_column(table_id) out, err = capsys.readouterr() assert "A new column has been added." in out diff --git a/samples/tests/test_browse_table_data.py b/samples/tests/test_browse_table_data.py index 368e5cad6..670eb7ccf 100644 --- a/samples/tests/test_browse_table_data.py +++ b/samples/tests/test_browse_table_data.py @@ -23,7 +23,6 @@ def test_browse_table_data( capsys: "pytest.CaptureFixture[str]", table_with_data_id: str ) -> None: - browse_table_data.browse_table_data(table_with_data_id) out, err = capsys.readouterr() assert "Downloaded 164656 rows from table {}".format(table_with_data_id) in out diff --git a/samples/tests/test_client_list_jobs.py b/samples/tests/test_client_list_jobs.py index a2845b7ad..6bb1bbd19 100644 --- a/samples/tests/test_client_list_jobs.py +++ b/samples/tests/test_client_list_jobs.py @@ -25,7 +25,6 @@ def test_client_list_jobs( capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client" ) -> None: - job = create_job.create_job() client.cancel_job(job.job_id) job.cancel() diff --git a/samples/tests/test_client_load_partitioned_table.py b/samples/tests/test_client_load_partitioned_table.py index 24f86c700..2f6564afa 100644 --- a/samples/tests/test_client_load_partitioned_table.py +++ b/samples/tests/test_client_load_partitioned_table.py @@ -23,7 +23,6 @@ def test_client_load_partitioned_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_load_partitioned_table.client_load_partitioned_table(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows to table {}".format(random_table_id) in out diff --git a/samples/tests/test_client_query.py b/samples/tests/test_client_query.py index a8e3c343e..5d4fb9c94 100644 --- a/samples/tests/test_client_query.py +++ b/samples/tests/test_client_query.py @@ -21,7 +21,6 @@ def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: - client_query.client_query() out, err = capsys.readouterr() assert "The query data:" in out diff --git a/samples/tests/test_client_query_add_column.py b/samples/tests/test_client_query_add_column.py index 1eb5a1ed6..c80f195a5 100644 --- a/samples/tests/test_client_query_add_column.py +++ b/samples/tests/test_client_query_add_column.py @@ -25,7 +25,6 @@ def test_client_query_add_column( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_client_query_batch.py b/samples/tests/test_client_query_batch.py index 548fe3ac3..b1e0e2647 100644 --- a/samples/tests/test_client_query_batch.py +++ b/samples/tests/test_client_query_batch.py @@ -21,7 +21,6 @@ def test_client_query_batch(capsys: "pytest.CaptureFixture[str]") -> None: - job = client_query_batch.client_query_batch() out, err = capsys.readouterr() assert "Job {} is currently in state {}".format(job.job_id, job.state) in out diff --git a/samples/tests/test_client_query_destination_table.py b/samples/tests/test_client_query_destination_table.py index 067bc16ec..1487f6e65 100644 --- a/samples/tests/test_client_query_destination_table.py +++ b/samples/tests/test_client_query_destination_table.py @@ -23,7 +23,6 @@ def test_client_query_destination_table( capsys: "pytest.CaptureFixture[str]", table_id: str ) -> None: - client_query_destination_table.client_query_destination_table(table_id) out, err = capsys.readouterr() assert "Query results loaded to the table {}".format(table_id) in out diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py index 02b131531..8a1e5bcd4 100644 --- a/samples/tests/test_client_query_destination_table_clustered.py +++ b/samples/tests/test_client_query_destination_table_clustered.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_clustered( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_query_destination_table_clustered.client_query_destination_table_clustered( random_table_id ) diff --git a/samples/tests/test_client_query_destination_table_cmek.py b/samples/tests/test_client_query_destination_table_cmek.py index f2fe3bc39..4cb76be8e 100644 --- a/samples/tests/test_client_query_destination_table_cmek.py +++ b/samples/tests/test_client_query_destination_table_cmek.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_cmek( capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str ) -> None: - client_query_destination_table_cmek.client_query_destination_table_cmek( random_table_id, kms_key_name ) diff --git a/samples/tests/test_client_query_destination_table_legacy.py b/samples/tests/test_client_query_destination_table_legacy.py index 0071ee4a4..78a199bea 100644 --- a/samples/tests/test_client_query_destination_table_legacy.py +++ b/samples/tests/test_client_query_destination_table_legacy.py @@ -23,7 +23,6 @@ def test_client_query_destination_table_legacy( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - client_query_destination_table_legacy.client_query_destination_table_legacy( random_table_id ) diff --git a/samples/tests/test_client_query_dry_run.py b/samples/tests/test_client_query_dry_run.py index cffb152ef..cfc8100a1 100644 --- a/samples/tests/test_client_query_dry_run.py +++ b/samples/tests/test_client_query_dry_run.py @@ -21,7 +21,6 @@ def test_client_query_dry_run(capsys: "pytest.CaptureFixture[str]") -> None: - query_job = client_query_dry_run.client_query_dry_run() out, err = capsys.readouterr() assert "This query will process" in out diff --git a/samples/tests/test_client_query_legacy_sql.py b/samples/tests/test_client_query_legacy_sql.py index b12b5a934..98303cde9 100644 --- a/samples/tests/test_client_query_legacy_sql.py +++ b/samples/tests/test_client_query_legacy_sql.py @@ -22,7 +22,6 @@ def test_client_query_legacy_sql(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_legacy_sql.client_query_legacy_sql() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/samples/tests/test_client_query_relax_column.py b/samples/tests/test_client_query_relax_column.py index 93fa0f3cf..0df8463be 100644 --- a/samples/tests/test_client_query_relax_column.py +++ b/samples/tests/test_client_query_relax_column.py @@ -27,7 +27,6 @@ def test_client_query_relax_column( random_table_id: str, client: bigquery.Client, ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_client_query_w_array_params.py b/samples/tests/test_client_query_w_array_params.py index fcd3f6972..c302712fe 100644 --- a/samples/tests/test_client_query_w_array_params.py +++ b/samples/tests/test_client_query_w_array_params.py @@ -21,7 +21,6 @@ def test_client_query_w_array_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_array_params.client_query_w_array_params() out, err = capsys.readouterr() assert "James" in out diff --git a/samples/tests/test_client_query_w_named_params.py b/samples/tests/test_client_query_w_named_params.py index 85ef1dc4a..e4d66be41 100644 --- a/samples/tests/test_client_query_w_named_params.py +++ b/samples/tests/test_client_query_w_named_params.py @@ -21,7 +21,6 @@ def test_client_query_w_named_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_named_params.client_query_w_named_params() out, err = capsys.readouterr() assert "the" in out diff --git a/samples/tests/test_client_query_w_positional_params.py b/samples/tests/test_client_query_w_positional_params.py index 8ade676ab..61df76aaa 100644 --- a/samples/tests/test_client_query_w_positional_params.py +++ b/samples/tests/test_client_query_w_positional_params.py @@ -21,7 +21,6 @@ def test_client_query_w_positional_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_positional_params.client_query_w_positional_params() out, err = capsys.readouterr() assert "the" in out diff --git a/samples/tests/test_client_query_w_struct_params.py b/samples/tests/test_client_query_w_struct_params.py index 3198dbad5..5eea993ce 100644 --- a/samples/tests/test_client_query_w_struct_params.py +++ b/samples/tests/test_client_query_w_struct_params.py @@ -21,7 +21,6 @@ def test_client_query_w_struct_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_struct_params.client_query_w_struct_params() out, err = capsys.readouterr() assert "1" in out diff --git a/samples/tests/test_client_query_w_timestamp_params.py b/samples/tests/test_client_query_w_timestamp_params.py index a3bbccdd4..8147d4a96 100644 --- a/samples/tests/test_client_query_w_timestamp_params.py +++ b/samples/tests/test_client_query_w_timestamp_params.py @@ -21,7 +21,6 @@ def test_client_query_w_timestamp_params(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_w_timestamp_params.client_query_w_timestamp_params() out, err = capsys.readouterr() assert "2016, 12, 7, 9, 0" in out diff --git a/samples/tests/test_copy_table_multiple_source.py b/samples/tests/test_copy_table_multiple_source.py index e8b27d2a9..5d7991c91 100644 --- a/samples/tests/test_copy_table_multiple_source.py +++ b/samples/tests/test_copy_table_multiple_source.py @@ -29,7 +29,6 @@ def test_copy_table_multiple_source( random_dataset_id: str, client: bigquery.Client, ) -> None: - dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" dataset = client.create_dataset(dataset) diff --git a/samples/tests/test_create_dataset.py b/samples/tests/test_create_dataset.py index e7a897f8f..ecf5ef129 100644 --- a/samples/tests/test_create_dataset.py +++ b/samples/tests/test_create_dataset.py @@ -23,7 +23,6 @@ def test_create_dataset( capsys: "pytest.CaptureFixture[str]", random_dataset_id: str ) -> None: - create_dataset.create_dataset(random_dataset_id) out, err = capsys.readouterr() assert "Created dataset {}".format(random_dataset_id) in out diff --git a/samples/tests/test_dataset_exists.py b/samples/tests/test_dataset_exists.py index bfef4368f..744122e37 100644 --- a/samples/tests/test_dataset_exists.py +++ b/samples/tests/test_dataset_exists.py @@ -27,7 +27,6 @@ def test_dataset_exists( random_dataset_id: str, client: bigquery.Client, ) -> None: - dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() assert "Dataset {} is not found".format(random_dataset_id) in out diff --git a/samples/tests/test_dataset_label_samples.py b/samples/tests/test_dataset_label_samples.py index 75a024856..ec9ff9228 100644 --- a/samples/tests/test_dataset_label_samples.py +++ b/samples/tests/test_dataset_label_samples.py @@ -25,7 +25,6 @@ def test_dataset_label_samples( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - label_dataset.label_dataset(dataset_id) out, err = capsys.readouterr() assert "Labels added to {}".format(dataset_id) in out diff --git a/samples/tests/test_delete_dataset.py b/samples/tests/test_delete_dataset.py index 9347bf185..c2a77c475 100644 --- a/samples/tests/test_delete_dataset.py +++ b/samples/tests/test_delete_dataset.py @@ -21,7 +21,6 @@ def test_delete_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: - delete_dataset.delete_dataset(dataset_id) out, err = capsys.readouterr() assert "Deleted dataset '{}'.".format(dataset_id) in out diff --git a/samples/tests/test_delete_table.py b/samples/tests/test_delete_table.py index aca2df62f..5ba5622e8 100644 --- a/samples/tests/test_delete_table.py +++ b/samples/tests/test_delete_table.py @@ -21,7 +21,6 @@ def test_delete_table(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: - delete_table.delete_table(table_id) out, err = capsys.readouterr() assert "Deleted table '{}'.".format(table_id) in out diff --git a/samples/tests/test_get_dataset.py b/samples/tests/test_get_dataset.py index 97b30541b..07c7a28b7 100644 --- a/samples/tests/test_get_dataset.py +++ b/samples/tests/test_get_dataset.py @@ -21,7 +21,6 @@ def test_get_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: - get_dataset.get_dataset(dataset_id) out, err = capsys.readouterr() assert dataset_id in out diff --git a/samples/tests/test_get_table.py b/samples/tests/test_get_table.py index e6383010f..edf09762d 100644 --- a/samples/tests/test_get_table.py +++ b/samples/tests/test_get_table.py @@ -25,7 +25,6 @@ def test_get_table( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_list_tables.py b/samples/tests/test_list_tables.py index 7c726accc..c8a66b656 100644 --- a/samples/tests/test_list_tables.py +++ b/samples/tests/test_list_tables.py @@ -23,7 +23,6 @@ def test_list_tables( capsys: "pytest.CaptureFixture[str]", dataset_id: str, table_id: str ) -> None: - list_tables.list_tables(dataset_id) out, err = capsys.readouterr() assert "Tables contained in '{}':".format(dataset_id) in out diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py index bbf3c671f..89059271a 100644 --- a/samples/tests/test_load_table_clustered.py +++ b/samples/tests/test_load_table_clustered.py @@ -26,7 +26,6 @@ def test_load_table_clustered( random_table_id: str, client: "bigquery.Client", ) -> None: - table = load_table_clustered.load_table_clustered(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_dataframe.py b/samples/tests/test_load_table_dataframe.py index 9a975493c..4aa872fa4 100644 --- a/samples/tests/test_load_table_dataframe.py +++ b/samples/tests/test_load_table_dataframe.py @@ -31,7 +31,6 @@ def test_load_table_dataframe( client: "bigquery.Client", random_table_id: str, ) -> None: - table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr() expected_column_names = [ diff --git a/samples/tests/test_load_table_uri_autodetect_csv.py b/samples/tests/test_load_table_uri_autodetect_csv.py index c9b410850..46b593713 100644 --- a/samples/tests/test_load_table_uri_autodetect_csv.py +++ b/samples/tests/test_load_table_uri_autodetect_csv.py @@ -23,7 +23,6 @@ def test_load_table_uri_autodetect_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_autodetect_json.py b/samples/tests/test_load_table_uri_autodetect_json.py index 2c68a13db..43bf4e1b3 100644 --- a/samples/tests/test_load_table_uri_autodetect_json.py +++ b/samples/tests/test_load_table_uri_autodetect_json.py @@ -23,7 +23,6 @@ def test_load_table_uri_autodetect_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) out, err = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_cmek.py b/samples/tests/test_load_table_uri_cmek.py index 1eb873843..1ae8689f9 100644 --- a/samples/tests/test_load_table_uri_cmek.py +++ b/samples/tests/test_load_table_uri_cmek.py @@ -23,7 +23,6 @@ def test_load_table_uri_cmek( capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str ) -> None: - load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name) out, _ = capsys.readouterr() assert "A table loaded with encryption configuration key" in out diff --git a/samples/tests/test_load_table_uri_csv.py b/samples/tests/test_load_table_uri_csv.py index a57224c84..8b4c733e8 100644 --- a/samples/tests/test_load_table_uri_csv.py +++ b/samples/tests/test_load_table_uri_csv.py @@ -23,7 +23,6 @@ def test_load_table_uri_csv( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_csv.load_table_uri_csv(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_json.py b/samples/tests/test_load_table_uri_json.py index 3ad0ce29b..751c3867a 100644 --- a/samples/tests/test_load_table_uri_json.py +++ b/samples/tests/test_load_table_uri_json.py @@ -23,7 +23,6 @@ def test_load_table_uri_json( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_json.load_table_uri_json(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_orc.py b/samples/tests/test_load_table_uri_orc.py index f31e8cabb..23d8288b7 100644 --- a/samples/tests/test_load_table_uri_orc.py +++ b/samples/tests/test_load_table_uri_orc.py @@ -23,7 +23,6 @@ def test_load_table_uri_orc( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_orc.load_table_uri_orc(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_parquet.py b/samples/tests/test_load_table_uri_parquet.py index 5404e8584..ee7682388 100644 --- a/samples/tests/test_load_table_uri_parquet.py +++ b/samples/tests/test_load_table_uri_parquet.py @@ -23,7 +23,6 @@ def test_load_table_uri_json( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - load_table_uri_parquet.load_table_uri_parquet(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_query_external_gcs_temporary_table.py b/samples/tests/test_query_external_gcs_temporary_table.py index 9590f3d7a..75b3ce6d8 100644 --- a/samples/tests/test_query_external_gcs_temporary_table.py +++ b/samples/tests/test_query_external_gcs_temporary_table.py @@ -23,7 +23,6 @@ def test_query_external_gcs_temporary_table( capsys: "pytest.CaptureFixture[str]", ) -> None: - query_external_gcs_temporary_table.query_external_gcs_temporary_table() out, err = capsys.readouterr() assert "There are 4 states with names starting with W." in out diff --git a/samples/tests/test_query_external_sheets_permanent_table.py b/samples/tests/test_query_external_sheets_permanent_table.py index 851839054..1a4c21330 100644 --- a/samples/tests/test_query_external_sheets_permanent_table.py +++ b/samples/tests/test_query_external_sheets_permanent_table.py @@ -23,7 +23,6 @@ def test_query_external_sheets_permanent_table( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - query_external_sheets_permanent_table.query_external_sheets_permanent_table( dataset_id ) diff --git a/samples/tests/test_query_external_sheets_temporary_table.py b/samples/tests/test_query_external_sheets_temporary_table.py index 58e0cb394..2ada20566 100644 --- a/samples/tests/test_query_external_sheets_temporary_table.py +++ b/samples/tests/test_query_external_sheets_temporary_table.py @@ -23,7 +23,6 @@ def test_query_external_sheets_temporary_table( capsys: "pytest.CaptureFixture[str]", ) -> None: - query_external_sheets_temporary_table.query_external_sheets_temporary_table() out, err = capsys.readouterr() assert "There are 2 states with names starting with W in the selected range." in out diff --git a/samples/tests/test_query_no_cache.py b/samples/tests/test_query_no_cache.py index f3fb039c9..fffa5dac7 100644 --- a/samples/tests/test_query_no_cache.py +++ b/samples/tests/test_query_no_cache.py @@ -22,7 +22,6 @@ def test_query_no_cache(capsys: "pytest.CaptureFixture[str]") -> None: - query_no_cache.query_no_cache() out, err = capsys.readouterr() assert re.search(r"(Row[\w(){}:', ]+)$", out) diff --git a/samples/tests/test_query_pagination.py b/samples/tests/test_query_pagination.py index daf711e49..adc946399 100644 --- a/samples/tests/test_query_pagination.py +++ b/samples/tests/test_query_pagination.py @@ -21,7 +21,6 @@ def test_query_pagination(capsys: "pytest.CaptureFixture[str]") -> None: - query_pagination.query_pagination() out, _ = capsys.readouterr() assert "The query data:" in out diff --git a/samples/tests/test_query_script.py b/samples/tests/test_query_script.py index 98dd1253b..50c973024 100644 --- a/samples/tests/test_query_script.py +++ b/samples/tests/test_query_script.py @@ -21,7 +21,6 @@ def test_query_script(capsys: "pytest.CaptureFixture[str]") -> None: - query_script.query_script() out, _ = capsys.readouterr() assert "Script created 2 child jobs." in out diff --git a/samples/tests/test_query_to_arrow.py b/samples/tests/test_query_to_arrow.py index d9b1aeb73..9fc8571e9 100644 --- a/samples/tests/test_query_to_arrow.py +++ b/samples/tests/test_query_to_arrow.py @@ -20,7 +20,6 @@ def test_query_to_arrow(capsys: "pytest.CaptureFixture[str]") -> None: - arrow_table = query_to_arrow.query_to_arrow() out, err = capsys.readouterr() assert "Downloaded 8 rows, 2 columns." in out diff --git a/samples/tests/test_table_exists.py b/samples/tests/test_table_exists.py index 7317ba747..35cf61cc8 100644 --- a/samples/tests/test_table_exists.py +++ b/samples/tests/test_table_exists.py @@ -25,7 +25,6 @@ def test_table_exists( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - table_exists.table_exists(random_table_id) out, err = capsys.readouterr() assert "Table {} is not found.".format(random_table_id) in out diff --git a/samples/tests/test_table_insert_rows.py b/samples/tests/test_table_insert_rows.py index 59024fa95..13400d69c 100644 --- a/samples/tests/test_table_insert_rows.py +++ b/samples/tests/test_table_insert_rows.py @@ -27,7 +27,6 @@ def test_table_insert_rows( random_table_id: str, client: bigquery.Client, ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py index 00456ce84..c6bfbf392 100644 --- a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py +++ b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -25,7 +25,6 @@ def test_table_insert_rows_explicit_none_insert_ids( capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client ) -> None: - schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/samples/tests/test_update_dataset_access.py b/samples/tests/test_update_dataset_access.py index 186a3b575..f17634fb0 100644 --- a/samples/tests/test_update_dataset_access.py +++ b/samples/tests/test_update_dataset_access.py @@ -23,7 +23,6 @@ def test_update_dataset_access( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - update_dataset_access.update_dataset_access(dataset_id) out, err = capsys.readouterr() assert ( diff --git a/samples/tests/test_update_dataset_default_partition_expiration.py b/samples/tests/test_update_dataset_default_partition_expiration.py index b7787dde3..4dd0d9296 100644 --- a/samples/tests/test_update_dataset_default_partition_expiration.py +++ b/samples/tests/test_update_dataset_default_partition_expiration.py @@ -23,7 +23,6 @@ def test_update_dataset_default_partition_expiration( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_partition_expiration.update_dataset_default_partition_expiration( diff --git a/samples/tests/test_update_dataset_default_table_expiration.py b/samples/tests/test_update_dataset_default_table_expiration.py index f780827f2..24df5446d 100644 --- a/samples/tests/test_update_dataset_default_table_expiration.py +++ b/samples/tests/test_update_dataset_default_table_expiration.py @@ -23,7 +23,6 @@ def test_update_dataset_default_table_expiration( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds update_dataset_default_table_expiration.update_dataset_default_table_expiration( diff --git a/samples/tests/test_update_dataset_description.py b/samples/tests/test_update_dataset_description.py index 5d1209e22..6d76337dc 100644 --- a/samples/tests/test_update_dataset_description.py +++ b/samples/tests/test_update_dataset_description.py @@ -23,7 +23,6 @@ def test_update_dataset_description( capsys: "pytest.CaptureFixture[str]", dataset_id: str ) -> None: - update_dataset_description.update_dataset_description(dataset_id) out, err = capsys.readouterr() assert "Updated description." in out diff --git a/samples/tests/test_update_table_require_partition_filter.py b/samples/tests/test_update_table_require_partition_filter.py index 68e1c1e2b..c86a22769 100644 --- a/samples/tests/test_update_table_require_partition_filter.py +++ b/samples/tests/test_update_table_require_partition_filter.py @@ -27,7 +27,6 @@ def test_update_table_require_partition_filter( random_table_id: str, client: bigquery.Client, ) -> None: - # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] table = bigquery.Table(random_table_id, schema=schema) diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py index fda784da5..2fb21aff2 100644 --- a/samples/update_dataset_access.py +++ b/samples/update_dataset_access.py @@ -14,7 +14,6 @@ def update_dataset_access(dataset_id: str) -> None: - # [START bigquery_update_dataset_access] from google.cloud import bigquery diff --git a/samples/update_dataset_default_partition_expiration.py b/samples/update_dataset_default_partition_expiration.py index 37456f3a0..7a3ccaca3 100644 --- a/samples/update_dataset_default_partition_expiration.py +++ b/samples/update_dataset_default_partition_expiration.py @@ -14,7 +14,6 @@ def update_dataset_default_partition_expiration(dataset_id: str) -> None: - # [START bigquery_update_dataset_partition_expiration] from google.cloud import bigquery diff --git a/samples/update_dataset_default_table_expiration.py b/samples/update_dataset_default_table_expiration.py index cf6f50d9f..ccd0d979e 100644 --- a/samples/update_dataset_default_table_expiration.py +++ b/samples/update_dataset_default_table_expiration.py @@ -14,7 +14,6 @@ def update_dataset_default_table_expiration(dataset_id: str) -> None: - # [START bigquery_update_dataset_expiration] from google.cloud import bigquery diff --git a/samples/update_dataset_description.py b/samples/update_dataset_description.py index 98c5fed43..b12baa999 100644 --- a/samples/update_dataset_description.py +++ b/samples/update_dataset_description.py @@ -14,7 +14,6 @@ def update_dataset_description(dataset_id: str) -> None: - # [START bigquery_update_dataset_description] from google.cloud import bigquery diff --git a/samples/update_routine.py b/samples/update_routine.py index 1a975a253..1a8908295 100644 --- a/samples/update_routine.py +++ b/samples/update_routine.py @@ -19,7 +19,6 @@ def update_routine(routine_id: str) -> "bigquery.Routine": - # [START bigquery_update_routine] from google.cloud import bigquery diff --git a/samples/update_table_require_partition_filter.py b/samples/update_table_require_partition_filter.py index 8221238a7..40b739b76 100644 --- a/samples/update_table_require_partition_filter.py +++ b/samples/update_table_require_partition_filter.py @@ -14,7 +14,6 @@ def update_table_require_partition_filter(table_id: str) -> None: - # [START bigquery_update_table_require_partition_filter] from google.cloud import bigquery diff --git a/setup.py b/setup.py index 08106f694..4e87b3b84 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ "ipywidgets>=7.7.0", "ipykernel>=6.0.0", ], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], "ipython": [ "ipython>=7.23.1,!=8.1.0", "ipykernel>=6.0.0", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 8fd532f4c..09606590e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -265,6 +265,13 @@ def test_get_dataset(self): self.assertEqual(got.friendly_name, "Friendly") self.assertEqual(got.description, "Description") + def test_create_dataset_with_default_rounding_mode(self): + DATASET_ID = _make_dataset_id("create_dataset_rounding_mode") + dataset = self.temp_dataset(DATASET_ID, default_rounding_mode="ROUND_HALF_EVEN") + + self.assertTrue(_dataset_exists(dataset)) + self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) @@ -2286,12 +2293,15 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def temp_dataset(self, dataset_id, location=None): + def temp_dataset(self, dataset_id, *args, **kwargs): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = Dataset(dataset_ref) - if location: - dataset.location = location + if kwargs.get("location"): + dataset.location = kwargs.get("location") + if kwargs.get("default_rounding_mode"): + dataset.default_rounding_mode = kwargs.get("default_rounding_mode") + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset @@ -2319,7 +2329,6 @@ def _table_exists(t): def test_dbapi_create_view(dataset_id: str): - query = f""" CREATE VIEW {dataset_id}.dbapi_create_view AS SELECT name, SUM(number) AS total diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 7d3186d47..26f1f2a73 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -54,7 +54,6 @@ def _make_resource(self, started=False, ended=False, location="US"): return resource def _verifyBooleanResourceProperties(self, job, config): - if "allowLargeResults" in config: self.assertEqual(job.allow_large_results, config["allowLargeResults"]) else: diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 1ae988414..279a954c7 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -18,7 +18,9 @@ import pytest + import google.cloud._helpers +import google.cloud.bigquery.model KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @@ -136,6 +138,7 @@ def test_from_api_repr(target_class): google.cloud._helpers._rfc3339_to_datetime(got.training_runs[2]["startTime"]) == expiration_time ) + assert got.transform_columns == [] def test_from_api_repr_w_minimal_resource(target_class): @@ -293,6 +296,71 @@ def test_feature_columns(object_under_test): assert object_under_test.feature_columns == expected +def test_from_api_repr_w_transform_columns(target_class): + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "transformColumns": [ + { + "name": "transform_name", + "type": {"typeKind": "INT64"}, + "transformSql": "transform_sql", + } + ], + } + got = target_class.from_api_repr(resource) + assert len(got.transform_columns) == 1 + transform_column = got.transform_columns[0] + assert isinstance(transform_column, google.cloud.bigquery.model.TransformColumn) + assert transform_column.name == "transform_name" + + +def test_transform_column_name(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"name": "is_female"} + ) + assert transform_columns.name == "is_female" + + +def test_transform_column_transform_sql(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"transformSql": "is_female"} + ) + assert transform_columns.transform_sql == "is_female" + + +def test_transform_column_type(): + transform_columns = google.cloud.bigquery.model.TransformColumn( + {"type": {"typeKind": "BOOL"}} + ) + assert transform_columns.type_.type_kind == "BOOL" + + +def test_transform_column_type_none(): + transform_columns = google.cloud.bigquery.model.TransformColumn({}) + assert transform_columns.type_ is None + + +def test_transform_column_from_api_repr_with_unknown_properties(): + transform_column = google.cloud.bigquery.model.TransformColumn.from_api_repr( + { + "name": "is_female", + "type": {"typeKind": "BOOL"}, + "transformSql": "is_female", + "test": "one", + } + ) + assert transform_column._properties == { + "name": "is_female", + "type": {"typeKind": "BOOL"}, + "transformSql": "is_female", + "test": "one", + } + + def test_label_columns(object_under_test): from google.cloud.bigquery import standard_sql diff --git a/tests/unit/routine/test_remote_function_options.py b/tests/unit/routine/test_remote_function_options.py index b476dca1e..ffd57e8c1 100644 --- a/tests/unit/routine/test_remote_function_options.py +++ b/tests/unit/routine/test_remote_function_options.py @@ -32,7 +32,6 @@ def target_class(): def test_ctor(target_class): - options = target_class( endpoint=ENDPOINT, connection=CONNECTION, diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 4fb86f665..e2e2da3c8 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,143 +19,6 @@ import mock -try: - from google.cloud import bigquery_storage # type: ignore -except ImportError: # pragma: NO COVER - bigquery_storage = None - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - - -@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") -class TestBQStorageVersions(unittest.TestCase): - def tearDown(self): - from google.cloud.bigquery import _helpers - - # Reset any cached versions since it may not match reality. - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - - def _object_under_test(self): - from google.cloud.bigquery import _helpers - - return _helpers.BQStorageVersions() - - def _call_fut(self): - from google.cloud.bigquery import _helpers - - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - return _helpers.BQ_STORAGE_VERSIONS.verify_version() - - def test_raises_no_error_w_recent_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): - try: - self._call_fut() - except LegacyBigQueryStorageError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_raises_error_w_legacy_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(LegacyBigQueryStorageError): - self._call_fut() - - def test_raises_error_w_unknown_bqstorage_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: - del fake_module.__version__ - error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): - self._call_fut() - - def test_installed_version_returns_cached(self): - versions = self._object_under_test() - versions._installed_version = object() - assert versions.installed_version is versions._installed_version - - def test_installed_version_returns_parsed_version(self): - versions = self._object_under_test() - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): - version = versions.installed_version - - assert version.major == 1 - assert version.minor == 2 - assert version.micro == 3 - - def test_is_read_session_optional_true(self): - versions = self._object_under_test() - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): - assert versions.is_read_session_optional - - def test_is_read_session_optional_false(self): - versions = self._object_under_test() - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): - assert not versions.is_read_session_optional - - -@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") -class TestPyarrowVersions(unittest.TestCase): - def tearDown(self): - from google.cloud.bigquery import _helpers - - # Reset any cached versions since it may not match reality. - _helpers.PYARROW_VERSIONS._installed_version = None - - def _object_under_test(self): - from google.cloud.bigquery import _helpers - - return _helpers.PyarrowVersions() - - def _call_try_import(self, **kwargs): - from google.cloud.bigquery import _helpers - - _helpers.PYARROW_VERSIONS._installed_version = None - return _helpers.PYARROW_VERSIONS.try_import(**kwargs) - - def test_try_import_raises_no_error_w_recent_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="5.0.0"): - try: - pyarrow = self._call_try_import(raise_if_error=True) - self.assertIsNotNone(pyarrow) - except LegacyPyarrowError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_try_import_returns_none_w_legacy_pyarrow(self): - with mock.patch("pyarrow.__version__", new="2.0.0"): - pyarrow = self._call_try_import() - self.assertIsNone(pyarrow) - - def test_try_import_raises_error_w_legacy_pyarrow(self): - from google.cloud.bigquery.exceptions import LegacyPyarrowError - - with mock.patch("pyarrow.__version__", new="2.0.0"): - with self.assertRaises(LegacyPyarrowError): - self._call_try_import(raise_if_error=True) - - def test_installed_version_returns_cached(self): - versions = self._object_under_test() - versions._installed_version = object() - assert versions.installed_version is versions._installed_version - - def test_installed_version_returns_parsed_version(self): - versions = self._object_under_test() - - with mock.patch("pyarrow.__version__", new="1.2.3"): - version = versions.installed_version - - assert version.major == 1 - assert version.minor == 2 - assert version.micro == 3 - class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index a4cc1fefb..212a6f1dd 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -40,11 +40,12 @@ from google import api_core from google.cloud.bigquery import exceptions -from google.cloud.bigquery import _helpers +from google.cloud.bigquery import _pyarrow_helpers +from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT -pyarrow = _helpers.PYARROW_VERSIONS.try_import() +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: import pyarrow.parquet @@ -54,12 +55,7 @@ # used in test parameterization. pyarrow = mock.Mock() -try: - from google.cloud import bigquery_storage - - _helpers.BQ_STORAGE_VERSIONS.verify_version() -except ImportError: # pragma: NO COVER - bigquery_storage = None +bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import() PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -346,14 +342,14 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field04", pyarrow.int64()), pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), + pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()), + pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()), pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()), + pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) expected = pyarrow.struct(expected) @@ -394,14 +390,14 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field04", pyarrow.int64()), pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), + pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()), + pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), - pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()), pyarrow.field("field12", pyarrow.date32()), - pyarrow.field("field13", module_under_test.pyarrow_time()), - pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()), + pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) expected_value_type = pyarrow.struct(expected) @@ -1117,7 +1113,9 @@ def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( "pyarrow not installed" ) - monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + monkeypatch.setattr( + _versions_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import + ) with pytest.raises(exceptions.LegacyPyarrowError): module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) @@ -1612,7 +1610,9 @@ def test__download_table_bqstorage_stream_includes_read_session( import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.types - monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") bqstorage_client = mock.create_autospec( bigquery_storage.BigQueryReadClient, instance=True @@ -1637,7 +1637,7 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( bigquery_storage is None - or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + or not _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1646,7 +1646,9 @@ def test__download_table_bqstorage_stream_omits_read_session( import google.cloud.bigquery_storage_v1.reader import google.cloud.bigquery_storage_v1.types - monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0") bqstorage_client = mock.create_autospec( bigquery_storage.BigQueryReadClient, instance=True diff --git a/tests/unit/test__pyarrow_helpers.py b/tests/unit/test__pyarrow_helpers.py new file mode 100644 index 000000000..f0a872c88 --- /dev/null +++ b/tests/unit/test__pyarrow_helpers.py @@ -0,0 +1,38 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") + + +@pytest.fixture +def module_under_test(): + from google.cloud.bigquery import _pyarrow_helpers + + return _pyarrow_helpers + + +def test_bq_to_arrow_scalars(module_under_test): + assert ( + module_under_test.bq_to_arrow_scalars("BIGNUMERIC") + == module_under_test.pyarrow_bignumeric + ) + assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None + + +def test_arrow_scalar_ids_to_bq(module_under_test): + assert module_under_test.arrow_scalar_ids_to_bq(pyarrow.bool_().id) == "BOOL" + assert module_under_test.arrow_scalar_ids_to_bq("UNKNOWN_TYPE") is None diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py new file mode 100644 index 000000000..144f14b7c --- /dev/null +++ b/tests/unit/test__versions_helpers.py @@ -0,0 +1,175 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import mock + +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None + +try: + from google.cloud import bigquery_storage # type: ignore +except ImportError: # pragma: NO COVER + bigquery_storage = None + +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions + + +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_try_import_raises_no_error_w_recent_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = versions.try_import(raise_if_error=True) + assert pyarrow is not None + except exceptions.LegacyPyarrowError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_try_import_returns_none_w_legacy_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = versions.try_import() + assert pyarrow is None + + +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_try_import_raises_error_w_legacy_pyarrow(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="2.0.0"): + with pytest.raises(exceptions.LegacyPyarrowError): + versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_installed_pyarrow_version_returns_cached(): + versions = _versions_helpers.PyarrowVersions() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + +@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed") +def test_installed_pyarrow_version_returns_parsed_version(): + versions = _versions_helpers.PyarrowVersions() + with mock.patch("pyarrow.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_no_error_w_recent_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("Legacy error raised with a non-legacy dependency version.") + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_error_w_legacy_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with pytest.raises(exceptions.LegacyBigQueryStorageError): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_returns_none_with_legacy_bqstorage(): + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bq_storage = bqstorage_versions.try_import() + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("Legacy error raised when raise_if_error == False.") + assert bq_storage is None + + +@pytest.mark.skipif( + bigquery_storage is not None, + reason="Tests behavior when `google-cloud-bigquery-storage` isn't installed", +) +def test_returns_none_with_bqstorage_uninstalled(): + try: + bqstorage_versions = _versions_helpers.BQStorageVersions() + bq_storage = bqstorage_versions.try_import() + except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER + raise ("NotFound error raised when raise_if_error == False.") + assert bq_storage is None + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_raises_error_w_unknown_bqstorage_version(): + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: 0.0.0" + with pytest.raises(exceptions.LegacyBigQueryStorageError, match=error_pattern): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions.try_import(raise_if_error=True) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_installed_bqstorage_version_returns_cached(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + bqstorage_versions._installed_version = object() + assert bqstorage_versions.installed_version is bqstorage_versions._installed_version + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_installed_bqstorage_version_returns_parsed_version(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): + bqstorage_versions = bqstorage_versions.installed_version + + assert bqstorage_versions.major == 1 + assert bqstorage_versions.minor == 2 + assert bqstorage_versions.micro == 3 + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_bqstorage_is_read_session_optional_true(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): + assert bqstorage_versions.is_read_session_optional + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_bqstorage_is_read_session_optional_false(): + bqstorage_versions = _versions_helpers.BQStorageVersions() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): + assert not bqstorage_versions.is_read_session_optional diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index faa073dce..d470bd9fd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,8 +27,8 @@ import warnings import mock -import packaging import requests +import packaging import pytest import pkg_resources @@ -65,6 +65,7 @@ from google.cloud import bigquery from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import exceptions from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions @@ -111,7 +112,6 @@ def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0): class TestClient(unittest.TestCase): - PROJECT = "PROJECT" DS_ID = "DATASET_ID" TABLE_ID = "TABLE_ID" @@ -170,7 +170,6 @@ def test_ctor_w_empty_client_options(self): ) def test_ctor_w_client_options_dict(self): - creds = _make_credentials() http = object() client_options = {"api_endpoint": "https://www.foo-googleapis.com"} @@ -823,14 +822,12 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ensure_bqstorage_client_obsolete_dependency(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import", + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: bqstorage_client = client._ensure_bqstorage_client() @@ -859,15 +856,13 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ensure_bqstorage_client_existing_client_check_fails(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import", + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) @@ -8617,7 +8612,7 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): dataframe = pandas.DataFrame(records) pyarrow_version_patch = mock.patch( - "google.cloud.bigquery.client._PYARROW_VERSION", + "google.cloud.bigquery._versions_helpers.PYARROW_VERSIONS._installed_version", packaging.version.parse("2.0.0"), # A known bad version of pyarrow. ) get_table_patch = mock.patch( @@ -8630,22 +8625,13 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): ) with load_patch, get_table_patch, pyarrow_version_patch: - with warnings.catch_warnings(record=True) as warned: + with pytest.raises(exceptions.LegacyPyarrowError): client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION, ) - expected_warnings = [ - warning for warning in warned if "pyarrow" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - assert issubclass(expected_warnings[0].category, RuntimeWarning) - msg = str(expected_warnings[0].message) - assert "pyarrow 2.0.0" in msg - assert "data corruption" in msg - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index 81af52261..3b2e644d9 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -63,6 +63,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "datasetId": "starry-skies", "tableId": "northern-hemisphere", } + DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -73,6 +74,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "defaultTableExpirationMs": "3600", "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "defaultRoundingMode": DEFAULT_ROUNDING_MODE, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -88,8 +90,8 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) - assert after.dataset_id == DS_ID assert after.project == PROJECT assert after.etag == RESOURCE["etag"] @@ -99,6 +101,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.location == LOCATION assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS + assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE conn.api_request.assert_called_once_with( method="POST", @@ -109,6 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "friendlyName": FRIENDLY_NAME, "location": LOCATION, "defaultTableExpirationMs": "3600", + "defaultRoundingMode": DEFAULT_ROUNDING_MODE, "access": [ {"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW, "role": None}, @@ -365,3 +369,100 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_none( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = None + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.default_rounding_mode = default_rounding_mode + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.default_rounding_mode is None + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "defaultRoundingMode": "ROUNDING_MODE_UNSPECIFIED", + }, + timeout=DEFAULT_TIMEOUT, + ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_not_string( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = 10 + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.default_rounding_mode = default_rounding_mode + assert str(e.value) == "Pass a string, or None" + + +def test_create_dataset_with_default_rounding_mode_if_value_is_not_in_possible_values( + PROJECT, DS_ID +): + default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZEROS" + ds_ref = DatasetReference(PROJECT, DS_ID) + dataset = Dataset(ds_ref) + with pytest.raises(ValueError) as e: + dataset.default_rounding_mode = default_rounding_mode + assert ( + str(e.value) + == "rounding mode needs to be one of ROUNDING_MODE_UNSPECIFIED,ROUND_HALF_AWAY_FROM_ZERO,ROUND_HALF_EVEN" + ) + + +def test_create_dataset_with_default_rounding_mode_if_value_is_in_possible_values( + PROJECT, DS_ID, LOCATION +): + default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZERO" + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.default_rounding_mode = default_rounding_mode + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.default_rounding_mode is None + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + "defaultRoundingMode": default_rounding_mode, + }, + timeout=DEFAULT_TIMEOUT, + ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 3b1452805..0a709ab43 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -152,6 +152,22 @@ def test_from_api_repr_w_unknown_entity_type(self): exp_resource = entry.to_api_repr() self.assertEqual(resource, exp_resource) + def test_from_api_repr_wo_role(self): + resource = { + "view": { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + } + entry = self._get_target_class().from_api_repr(resource) + exp_entry = self._make_one( + role=None, + entity_type="view", + entity_id=resource["view"], + ) + self.assertEqual(entry, exp_entry) + def test_to_api_repr_w_extra_properties(self): resource = { "role": "READER", @@ -693,7 +709,6 @@ def _verify_access_entry(self, access_entries, resource): self.assertEqual(a_entry.entity_id, r_entry["entity_id"]) def _verify_readonly_resource_properties(self, dataset, resource): - self.assertEqual(dataset.project, self.PROJECT) self.assertEqual(dataset.dataset_id, self.DS_ID) self.assertEqual(dataset.reference.project, self.PROJECT) @@ -717,7 +732,6 @@ def _verify_readonly_resource_properties(self, dataset, resource): self.assertIsNone(dataset.self_link) def _verify_resource_properties(self, dataset, resource): - self._verify_readonly_resource_properties(dataset, resource) if "defaultTableExpirationMs" in resource: diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index fae0c17e9..542f923d2 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -255,7 +255,6 @@ def test_non_empty_iterable(self): class TestRaiseOnClosedDecorator(unittest.TestCase): def _make_class(self): class Foo(object): - class_member = "class member" def __init__(self): diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 67fd13fa7..9fd16e699 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -21,7 +21,6 @@ class TestExternalConfig(unittest.TestCase): - SOURCE_URIS = ["gs://foo", "gs://bar"] BASE_RESOURCE = { diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index c0aa5d85e..b03894095 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -25,6 +25,7 @@ from test_utils.imports import maybe_fail_import from google.cloud import bigquery +from google.cloud.bigquery import exceptions as bq_exceptions from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.retry import DEFAULT_TIMEOUT @@ -338,6 +339,9 @@ def test__make_bqstorage_client_true(): def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): + """When package `google-cloud-bigquery-storage` is not installed, reports + ImportError. + """ credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -357,8 +361,9 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true_obsolete_dependency(): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - + """When package `google-cloud-bigquery-storage` is installed but has outdated + version, returns None, and raises a warning. + """ credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) @@ -367,8 +372,10 @@ def test__make_bqstorage_client_true_obsolete_dependency(): ) patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import", + side_effect=bq_exceptions.LegacyBigQueryStorageError( + "google-cloud-bigquery-storage is outdated" + ), ) with patcher, warnings.catch_warnings(record=True) as warned: got = magics._make_bqstorage_client(test_client, True, {}) @@ -376,7 +383,9 @@ def test__make_bqstorage_client_true_obsolete_dependency(): assert got is None matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) + warning + for warning in warned + if "google-cloud-bigquery-storage is outdated" in str(warning) ] assert matching_warnings, "Obsolete dependency warning not raised." @@ -638,9 +647,9 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, warnings.catch_warnings( - record=True - ) as warned: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), warnings.catch_warnings(record=True) as warned: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--use_bqstorage_api", sql) @@ -801,7 +810,9 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): with pytest.raises( OSError - ), client_query_patch as client_query_mock, default_patch, close_transports_patch as close_transports: + ), client_query_patch as client_query_mock, ( + default_patch + ), close_transports_patch as close_transports: client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f31dc5528..fa2f30cea 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -28,6 +28,8 @@ import google.api_core.exceptions from test_utils.imports import maybe_fail_import +from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import exceptions from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -40,17 +42,12 @@ bigquery_storage = None big_query_read_grpc_transport = None -from google.cloud.bigquery import _helpers -pyarrow = _helpers.PYARROW_VERSIONS.try_import() -PYARROW_VERSION = pkg_resources.parse_version("0.0.1") +pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() if pyarrow: - import pyarrow import pyarrow.types - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) - try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -73,8 +70,6 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - if pandas is not None: PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version else: @@ -416,7 +411,6 @@ def test___str__(self): class TestTable(unittest.TestCase, _SchemaBase): - PROJECT = "prahj-ekt" DS_ID = "dataset-name" TABLE_NAME = "table-name" @@ -524,7 +518,6 @@ def _verifyReadonlyResourceProperties(self, table, resource): ) def _verifyResourceProperties(self, table, resource): - self._verifyReadonlyResourceProperties(table, resource) if "expirationTime" in resource: @@ -1501,7 +1494,6 @@ def test___str__(self): class Test_row_from_mapping(unittest.TestCase, _SchemaBase): - PROJECT = "prahj-ekt" DS_ID = "dataset-name" TABLE_NAME = "table-name" @@ -1790,7 +1782,6 @@ def _make_table_list_item(*args, **kwargs): return TableListItem(*args, **kwargs) def test_table_eq_table_ref(self): - table = self._make_table("project_foo.dataset_bar.table_baz") dataset_ref = DatasetReference("project_foo", "dataset_bar") table_ref = self._make_table_ref(dataset_ref, "table_baz") @@ -1814,7 +1805,6 @@ def test_table_eq_table_list_item(self): assert table_list_item == table def test_table_ref_eq_table_list_item(self): - dataset_ref = DatasetReference("project_foo", "dataset_bar") table_ref = self._make_table_ref(dataset_ref, "table_baz") table_list_item = self._make_table_list_item( @@ -2267,13 +2257,11 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( - "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + "google.cloud.bigquery.table._versions_helpers.BQ_STORAGE_VERSIONS.try_import", + side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: result = iterator._validate_bqstorage( @@ -2878,11 +2866,11 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): mock_client = _mock_client() row_iterator = self._make_one(mock_client, api_request, path, schema) - def mock_verify_version(): - raise _helpers.LegacyBigQueryStorageError("no bqstorage") + def mock_verify_version(raise_if_error: bool = False): + raise exceptions.LegacyBigQueryStorageError("no bqstorage") with mock.patch( - "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", + "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import", mock_verify_version, ): tbl = row_iterator.to_arrow(create_bqstorage_client=True)