From 8ffadc84f63c83739bf150b2091158e94a1a5bc3 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:46:49 +0000 Subject: [PATCH] fix: Add boolean type handling and tests Co-Authored-By: alexander@anthropic.com --- .../src/mcp_server_datagen/synthetic.py | 2 ++ src/datagen/tests/unit/test_synthetic.py | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/datagen/src/mcp_server_datagen/synthetic.py b/src/datagen/src/mcp_server_datagen/synthetic.py index 3604c026..b6df6c3f 100644 --- a/src/datagen/src/mcp_server_datagen/synthetic.py +++ b/src/datagen/src/mcp_server_datagen/synthetic.py @@ -342,6 +342,8 @@ class SyntheticDataGenerator: value = self._generate_faker_value("date_time_this_decade") elif col_type == "category": value = np.random.choice(col_spec["categories"]) + elif col_type == "boolean": + value = bool(np.random.choice([True, False])) result[col_name].append(value) diff --git a/src/datagen/tests/unit/test_synthetic.py b/src/datagen/tests/unit/test_synthetic.py index f892ec4e..8d3e3db4 100644 --- a/src/datagen/tests/unit/test_synthetic.py +++ b/src/datagen/tests/unit/test_synthetic.py @@ -201,3 +201,30 @@ async def test_faker_string_generation(data_generator): data = await data_generator.generate_synthetic_data("test", schema, 10) assert all(isinstance(x, str) and x is not None for x in data["legacy_name"]), "Legacy faker format should still work" assert all(len(x) > 0 for x in data["legacy_name"]), "Legacy faker names should not be empty" + + +@pytest.mark.asyncio +async def test_boolean_type_handling(data_generator): + """Test handling of boolean type fields.""" + schema = { + "active": {"type": "boolean"}, + "verified": {"type": "boolean"}, + "premium": {"type": "boolean"} + } + + rows = 100 + data = await data_generator.generate_synthetic_data("test_table", schema, rows) + + # Verify all columns are present + assert set(data.keys()) == set(schema.keys()) + + # Verify number of rows + assert all(len(values) == rows for values in data.values()) + + # Verify boolean fields + for field in ["active", "verified", "premium"]: + assert all(isinstance(x, bool) for x in data[field]), f"{field} values should be booleans" + assert not any(x is None for x in data[field]), f"{field} values should not be null" + # Verify we get both True and False values + assert any(x is True for x in data[field]), f"{field} should have some True values" + assert any(x is False for x in data[field]), f"{field} should have some False values"