diff --git a/src/datagen/src/mcp_server_datagen/synthetic.py b/src/datagen/src/mcp_server_datagen/synthetic.py index 3604c026..b6df6c3f 100644 --- a/src/datagen/src/mcp_server_datagen/synthetic.py +++ b/src/datagen/src/mcp_server_datagen/synthetic.py @@ -342,6 +342,8 @@ class SyntheticDataGenerator: value = self._generate_faker_value("date_time_this_decade") elif col_type == "category": value = np.random.choice(col_spec["categories"]) + elif col_type == "boolean": + value = bool(np.random.choice([True, False])) result[col_name].append(value) diff --git a/src/datagen/tests/unit/test_synthetic.py b/src/datagen/tests/unit/test_synthetic.py index f892ec4e..8d3e3db4 100644 --- a/src/datagen/tests/unit/test_synthetic.py +++ b/src/datagen/tests/unit/test_synthetic.py @@ -201,3 +201,30 @@ async def test_faker_string_generation(data_generator): data = await data_generator.generate_synthetic_data("test", schema, 10) assert all(isinstance(x, str) and x is not None for x in data["legacy_name"]), "Legacy faker format should still work" assert all(len(x) > 0 for x in data["legacy_name"]), "Legacy faker names should not be empty" + + +@pytest.mark.asyncio +async def test_boolean_type_handling(data_generator): + """Test handling of boolean type fields.""" + schema = { + "active": {"type": "boolean"}, + "verified": {"type": "boolean"}, + "premium": {"type": "boolean"} + } + + rows = 100 + data = await data_generator.generate_synthetic_data("test_table", schema, rows) + + # Verify all columns are present + assert set(data.keys()) == set(schema.keys()) + + # Verify number of rows + assert all(len(values) == rows for values in data.values()) + + # Verify boolean fields + for field in ["active", "verified", "premium"]: + assert all(isinstance(x, bool) for x in data[field]), f"{field} values should be booleans" + assert not any(x is None for x in data[field]), f"{field} values should not be null" + # Verify we get both True and False values + assert any(x is True for x in data[field]), f"{field} should have some True values" + assert any(x is False for x in data[field]), f"{field} should have some False values"