Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0343a64
adds to_dataframe() to QueryJob
alixhami Nov 7, 2017
b74e6d4
removes unnecessary system test
alixhami Nov 7, 2017
e89b8de
adds docstring to to_dataframe()
alixhami Nov 7, 2017
8184716
updates to _make_resource() after rebasing for #4355
alixhami Nov 7, 2017
bc20f91
skips to_dataframe() tests if pandas is not installed
alixhami Nov 7, 2017
2b8ca85
imports pandas at module level and raises exception in to_dataframe()…
alixhami Nov 10, 2017
5c52dc6
adds pandas as extra for installation
alixhami Nov 10, 2017
484ab91
updates docstring to google style
alixhami Nov 10, 2017
4db3f4b
adds pandas extra to nox environment
alixhami Nov 10, 2017
a31e79d
adds 'no cover' pragma for pandas import errors
alixhami Nov 10, 2017
03b7fd5
adds test for when pandas is None
alixhami Nov 13, 2017
0c7bf88
fixes lint error
alixhami Nov 13, 2017
84994a7
adds RowIterator class
alixhami Nov 14, 2017
04f76f5
moves to_dataframe() to RowIterator
alixhami Nov 14, 2017
4fd0cc0
adds test for pandas handling of basic BigQuery data types
alixhami Nov 15, 2017
321b56a
moves schema to RowIterator constructor
alixhami Nov 15, 2017
da52040
adds tests for column dtypes
alixhami Nov 17, 2017
83d9e3c
adds test for query results to_dataframe() with nested schema
alixhami Nov 17, 2017
10fcd7c
updates system test for to_dataframe to check types
alixhami Nov 17, 2017
6762f95
adds to_dataframe() helper to QueryJob
alixhami Nov 18, 2017
0802ca8
updates pandas version to latest version that passes unit tests
alixhami Nov 22, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
adds test for query results to_dataframe() with nested schema
  • Loading branch information
alixhami committed Nov 22, 2017
commit 83d9e3c19191235a349e9e8c7e23c3de0d7ce345
49 changes: 49 additions & 0 deletions bigquery/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -1439,6 +1439,55 @@ def test_create_table_rows_fetch_nested_schema(self):
e_favtime = datetime.datetime(*parts[0:6])
self.assertEqual(found[7], e_favtime)

def _fetch_dataframe(self, query):
return Config.CLIENT.query(query).result().to_dataframe()

def test_nested_table_to_dataframe(self):
SF = bigquery.SchemaField
schema = [
SF('string_col', 'STRING', mode='NULLABLE'),
SF('record_col', 'RECORD', mode='NULLABLE', fields=[
SF('nested_string', 'STRING', mode='NULLABLE'),
SF('nested_repeated', 'INTEGER', mode='REPEATED'),
SF('nested_record', 'RECORD', mode='NULLABLE', fields=[
SF('nested_nested_string', 'STRING', mode='NULLABLE'),
]),
]),
]
record = {
'nested_string': 'another string value',
'nested_repeated': [0, 1, 2],
'nested_record': {'nested_nested_string': 'some deep insight'},
}
to_insert = [
('Some value', record)
]
table_id = 'test_table'
dataset = self.temp_dataset(_make_dataset_id('nested_df'))
table_arg = Table(dataset.table(table_id), schema=schema)
table = retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
Config.CLIENT.create_rows(table, to_insert)
QUERY = 'SELECT * from `{}.{}.{}`'.format(
Config.CLIENT.project, dataset.dataset_id, table_id)

retry = RetryResult(_has_rows, max_tries=8)
df = retry(self._fetch_dataframe)(QUERY)

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 1) # verify the number of rows
exp_columns = ['string_col', 'record_col']
self.assertEqual(list(df), exp_columns) # verify the column names
row = df.iloc[0]
# verify the row content
self.assertEqual(row['string_col'], 'Some value')
self.assertEqual(row['record_col'], record)
# verify that nested data can be accessed with indices/keys
self.assertEqual(row['record_col']['nested_repeated'][0], 0)
self.assertEqual(
row['record_col']['nested_record']['nested_nested_string'],
'some deep insight')

def temp_dataset(self, dataset_id):
dataset = retry_403(Config.CLIENT.create_dataset)(
Dataset(Config.CLIENT.dataset(dataset_id)))
Expand Down