Skip to content

Commit ba162f4

Browse files
committed
Getting MultiShard GraphSize Optimization together
1 parent 5cbdae6 commit ba162f4

File tree

3 files changed

+151
-108
lines changed

3 files changed

+151
-108
lines changed

v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapper.java

Lines changed: 101 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import com.google.common.collect.ImmutableList;
5050
import com.google.common.collect.ImmutableMap;
5151
import com.google.common.collect.ImmutableSet;
52+
import com.google.common.collect.Lists;
5253
import java.sql.SQLException;
5354
import java.util.List;
5455
import java.util.concurrent.ExecutionException;
@@ -638,42 +639,47 @@ private static PTransform<PBegin, PCollection<SourceRow>> getJdbcIO(
638639

639640
/* Todo in subsequent PR for multishard graphsize support, pass this to table reader. */
640641
DataSourceProvider dataSourceProvider = getDataSourceProvider(perSourceDiscoveries);
641-
for (PerSourceDiscovery perSourceDiscovery : perSourceDiscoveries) {
642-
ImmutableList.Builder<SourceTableReference> tableReferencesBuilder = ImmutableList.builder();
643-
ImmutableList.Builder<TableSplitSpecification> splitSpecsBuilder = ImmutableList.builder();
644-
ImmutableMap.Builder<TableIdentifier, TableReadSpecification<SourceRow>> readSpecsBuilder =
645-
ImmutableMap.builder();
646-
JdbcIOWrapperConfig config = perSourceDiscovery.config();
647-
DataSourceConfiguration dataSourceConfiguration =
648-
perSourceDiscovery.dataSourceConfiguration();
649-
ImmutableList<TableConfig> tableConfigs = perSourceDiscovery.tableConfigs();
650-
651-
if (!config.readWithUniformPartitionsFeatureEnabled() || tableConfigs.isEmpty()) {
652-
continue;
653-
}
654-
accumulateSpecs(
655-
perSourceDiscovery, tableReferencesBuilder, splitSpecsBuilder, readSpecsBuilder);
656-
657-
ReadWithUniformPartitions<SourceRow> readWithUniformPartitions =
658-
ReadWithUniformPartitions.<SourceRow>builder()
659-
.setTableSplitSpecifications(splitSpecsBuilder.build())
660-
.setTableReadSpecifications(readSpecsBuilder.build())
661-
.setDataSourceProviderFn(
662-
JdbcIO.PoolableDataSourceProvider.of(dataSourceConfiguration))
663-
.setDbAdapter(dialectAdapter)
664-
.setWaitOn(waitOn)
665-
.setDbParallelizationForSplitProcess(dbParallelizationForSplitProcess)
666-
.setDbParallelizationForReads(dbParallelizationForReads)
667-
.setAdditionalOperationsOnRanges(additionalOperationsOnRanges)
668-
.build();
669-
670-
LOG.info(
671-
"Configured Multi-Table ReadWithUniformPartitions {} for tables {} with config {}",
672-
readWithUniformPartitions,
673-
tableConfigs.stream().map(TableConfig::tableName).collect(Collectors.toList()),
674-
config);
675-
tableReadersBuilder.put(tableReferencesBuilder.build(), readWithUniformPartitions);
642+
ImmutableList.Builder<SourceTableReference> tableReferencesBuilder = ImmutableList.builder();
643+
ImmutableList.Builder<TableSplitSpecification> splitSpecsBuilder = ImmutableList.builder();
644+
ImmutableMap.Builder<TableIdentifier, TableReadSpecification<SourceRow>> readSpecsBuilder =
645+
ImmutableMap.builder();
646+
accumulateSpecs(
647+
perSourceDiscoveries, tableReferencesBuilder, splitSpecsBuilder, readSpecsBuilder);
648+
649+
ImmutableList<SourceTableReference> tableReferences = tableReferencesBuilder.build();
650+
if (tableReferences.isEmpty()) {
651+
return ImmutableMap.of();
676652
}
653+
654+
ReadWithUniformPartitions<SourceRow> readWithUniformPartitions =
655+
ReadWithUniformPartitions.<SourceRow>builder()
656+
.setTableSplitSpecifications(splitSpecsBuilder.build())
657+
.setTableReadSpecifications(readSpecsBuilder.build())
658+
.setDataSourceProvider(dataSourceProvider)
659+
.setDbAdapter(dialectAdapter)
660+
.setWaitOn(waitOn)
661+
.setDbParallelizationForSplitProcess(dbParallelizationForSplitProcess)
662+
.setDbParallelizationForReads(dbParallelizationForReads)
663+
.setAdditionalOperationsOnRanges(additionalOperationsOnRanges)
664+
.build();
665+
666+
Lists.partition(perSourceDiscoveries, 50)
667+
.forEach(
668+
batch ->
669+
LOG.info(
670+
"Configured Multi-Table ReadWithUniformPartitions for sources batch: {}",
671+
batch.stream()
672+
.map(
673+
d ->
674+
"id="
675+
+ d.config().id()
676+
+ ":"
677+
+ "shard_id="
678+
+ d.config().shardID()
679+
+ ":'"
680+
+ d.sourceSchema().schemaReference().jdbc().toString())
681+
.collect(Collectors.joining(","))));
682+
tableReadersBuilder.put(tableReferences, readWithUniformPartitions);
677683
return tableReadersBuilder.build();
678684
}
679685

@@ -699,58 +705,71 @@ private static PTransform<PBegin, PCollection<SourceRow>> getJdbcIO(
699705
/* Todo in subsequent PR for multishard graphsize support accumulate specs across a list of sourceDiscovereies */
700706
@VisibleForTesting
701707
protected static void accumulateSpecs(
702-
PerSourceDiscovery perSourceDiscovery,
708+
ImmutableList<PerSourceDiscovery> perSourceDiscoveries,
703709
ImmutableList.Builder<SourceTableReference> tableReferencesBuilder,
704710
ImmutableList.Builder<TableSplitSpecification> splitSpecsBuilder,
705711
ImmutableMap.Builder<TableIdentifier, TableReadSpecification<SourceRow>> readSpecsBuilder) {
706712

707-
JdbcIOWrapperConfig config = perSourceDiscovery.config();
708-
SourceSchemaReference sourceSchemaReference =
709-
perSourceDiscovery.sourceSchema().schemaReference();
710-
ImmutableList<TableConfig> tableConfigs = perSourceDiscovery.tableConfigs();
711-
for (TableConfig tableConfig : tableConfigs) {
712-
SourceTableSchema sourceTableSchema =
713-
findSourceTableSchema(perSourceDiscovery.sourceSchema(), tableConfig);
714-
int fetchSize = getFetchSize(config, tableConfig, sourceTableSchema);
715-
TableIdentifier tableIdentifier = getTableIdentifier(tableConfig);
716-
717-
TableSplitSpecification.Builder tableSplitSpecificationBuilder =
718-
TableSplitSpecification.builder()
719-
.setTableIdentifier(tableIdentifier)
720-
.setPartitionColumns(tableConfig.partitionColumns())
721-
.setApproxRowCount(tableConfig.approxRowCount());
722-
if (tableConfig.maxPartitions() != null) {
723-
tableSplitSpecificationBuilder =
724-
tableSplitSpecificationBuilder.setMaxPartitionsHint((long) tableConfig.maxPartitions());
725-
}
726-
if (config.splitStageCountHint() >= 0) {
727-
tableSplitSpecificationBuilder =
728-
tableSplitSpecificationBuilder.setSplitStagesCount((long) config.splitStageCountHint());
729-
}
730-
splitSpecsBuilder.add(tableSplitSpecificationBuilder.build());
731-
732-
TableReadSpecification.Builder<SourceRow> tableReadSpecificationBuilder =
733-
TableReadSpecification.<SourceRow>builder()
734-
.setFetchSize(fetchSize)
735-
.setTableIdentifier(tableIdentifier)
736-
.setRowMapper(
737-
new JdbcSourceRowMapper(
738-
config.valueMappingsProvider(),
739-
sourceSchemaReference,
740-
sourceTableSchema,
741-
config.shardID()));
742-
if (config.maxFetchSize() != null) {
743-
tableReadSpecificationBuilder =
744-
tableReadSpecificationBuilder.setFetchSize(config.maxFetchSize());
713+
for (PerSourceDiscovery perSourceDiscovery : perSourceDiscoveries) {
714+
JdbcIOWrapperConfig config = perSourceDiscovery.config();
715+
SourceSchemaReference sourceSchemaReference =
716+
perSourceDiscovery.sourceSchema().schemaReference();
717+
ImmutableList<TableConfig> tableConfigs = perSourceDiscovery.tableConfigs();
718+
for (TableConfig tableConfig : tableConfigs) {
719+
720+
if (!config.readWithUniformPartitionsFeatureEnabled() || tableConfigs.isEmpty()) {
721+
continue;
722+
}
723+
SourceTableSchema sourceTableSchema =
724+
findSourceTableSchema(perSourceDiscovery.sourceSchema(), tableConfig);
725+
int fetchSize = getFetchSize(config, tableConfig, sourceTableSchema);
726+
TableIdentifier tableIdentifier = getTableIdentifier(tableConfig);
727+
728+
TableSplitSpecification.Builder tableSplitSpecificationBuilder =
729+
TableSplitSpecification.builder()
730+
.setTableIdentifier(tableIdentifier)
731+
.setPartitionColumns(tableConfig.partitionColumns())
732+
.setApproxRowCount(tableConfig.approxRowCount());
733+
if (tableConfig.maxPartitions() != null) {
734+
tableSplitSpecificationBuilder =
735+
tableSplitSpecificationBuilder.setMaxPartitionsHint(
736+
(long) tableConfig.maxPartitions());
737+
}
738+
if (config.splitStageCountHint() >= 0) {
739+
tableSplitSpecificationBuilder =
740+
tableSplitSpecificationBuilder.setSplitStagesCount(
741+
(long) config.splitStageCountHint());
742+
}
743+
splitSpecsBuilder.add(tableSplitSpecificationBuilder.build());
744+
745+
TableReadSpecification.Builder<SourceRow> tableReadSpecificationBuilder =
746+
TableReadSpecification.<SourceRow>builder()
747+
.setFetchSize(fetchSize)
748+
.setTableIdentifier(tableIdentifier)
749+
.setRowMapper(
750+
new JdbcSourceRowMapper(
751+
config.valueMappingsProvider(),
752+
sourceSchemaReference,
753+
sourceTableSchema,
754+
config.shardID()));
755+
if (config.maxFetchSize() != null) {
756+
tableReadSpecificationBuilder =
757+
tableReadSpecificationBuilder.setFetchSize(config.maxFetchSize());
758+
}
759+
readSpecsBuilder.put(tableIdentifier, tableReadSpecificationBuilder.build());
760+
761+
tableReferencesBuilder.add(
762+
SourceTableReference.builder()
763+
.setSourceSchemaReference(sourceSchemaReference)
764+
.setSourceTableName(delimitIdentifier(sourceTableSchema.tableName()))
765+
.setSourceTableSchemaUUID(sourceTableSchema.tableSchemaUUID())
766+
.build());
767+
LOG.info(
768+
"Configuring Multi-Table ReadWithUniformPartitions for source-id {} tables {} with config {}",
769+
config.id(),
770+
tableConfigs.stream().map(TableConfig::tableName).collect(Collectors.toList()),
771+
config);
745772
}
746-
readSpecsBuilder.put(tableIdentifier, tableReadSpecificationBuilder.build());
747-
748-
tableReferencesBuilder.add(
749-
SourceTableReference.builder()
750-
.setSourceSchemaReference(sourceSchemaReference)
751-
.setSourceTableName(delimitIdentifier(sourceTableSchema.tableName()))
752-
.setSourceTableSchemaUUID(sourceTableSchema.tableSchemaUUID())
753-
.build());
754773
}
755774
}
756775

v2/sourcedb-to-spanner/src/main/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/uniformsplitter/transforms/ReadWithUniformPartitions.java

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import com.google.auto.value.AutoValue;
1919
import com.google.auto.value.extension.memoized.Memoized;
2020
import com.google.cloud.teleport.v2.source.reader.io.jdbc.uniformsplitter.DataSourceProvider;
21-
import com.google.cloud.teleport.v2.source.reader.io.jdbc.uniformsplitter.DataSourceProviderImpl;
2221
import com.google.cloud.teleport.v2.source.reader.io.jdbc.uniformsplitter.UniformSplitterDBAdapter;
2322
import com.google.cloud.teleport.v2.source.reader.io.jdbc.uniformsplitter.columnboundary.ColumnForBoundaryQuery;
2423
import com.google.cloud.teleport.v2.source.reader.io.jdbc.uniformsplitter.range.BoundaryTypeMapper;
@@ -569,15 +568,6 @@ public abstract static class Builder<T> {
569568

570569
public abstract Builder<T> setDataSourceProvider(DataSourceProvider value);
571570

572-
// TODO Remove in subsequent PR
573-
private SerializableFunction<Void, DataSource> dataSourceProviderSerializableFunction = null;
574-
575-
// TODO Remove in subsequent PR
576-
public Builder<T> setDataSourceProviderFn(SerializableFunction<Void, DataSource> value) {
577-
this.dataSourceProviderSerializableFunction = value;
578-
return this;
579-
}
580-
581571
public abstract Builder<T> setDbAdapter(UniformSplitterDBAdapter value);
582572

583573
public abstract Builder<T> setCountQueryTimeoutMillis(long value);
@@ -626,15 +616,6 @@ protected String getTransformPrefix() {
626616
}
627617

628618
public ReadWithUniformPartitions<T> build() {
629-
// TODO Remove in subsequent PR
630-
if (this.dataSourceProviderSerializableFunction != null) {
631-
this.setDataSourceProvider(
632-
DataSourceProviderImpl.builder()
633-
.addDataSource(
634-
this.tableSplitSpecifications().get(0).tableIdentifier().dataSourceId(),
635-
this.dataSourceProviderSerializableFunction)
636-
.build());
637-
}
638619
validateParameters();
639620

640621
java.util.Set<TableIdentifier> splitIdentifiers =

v2/sourcedb-to-spanner/src/test/java/com/google/cloud/teleport/v2/source/reader/io/jdbc/iowrapper/JdbcIoWrapperTest.java

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,19 +1009,19 @@ public void testBuildTableReaders() {
10091009

10101010
// Assert
10111011
// Legacy: shard2.t2 (1), shard3.t3a (1), shard3.t3b (1) = 3 transforms
1012-
// Uniform: shard5 (1), shard6 (1) = 2 transforms
1013-
// Total = 5 entries
1014-
assertThat(tableReaders).hasSize(5);
1012+
// Uniform: shard5 (1), shard6 (1) = 1 transform (combined)
1013+
// Total = 4 entries
1014+
assertThat(tableReaders).hasSize(4);
10151015

10161016
// Verify Legacy readers
10171017
long legacyCount =
10181018
tableReaders.values().stream().filter(v -> v instanceof JdbcIO.ReadWithPartitions).count();
10191019
assertThat(legacyCount).isEqualTo(3);
10201020

1021-
// Verify Uniform readers
1021+
// Verify that we have one combined Uniform reader
10221022
long uniformCount =
10231023
tableReaders.values().stream().filter(v -> v instanceof ReadWithUniformPartitions).count();
1024-
assertThat(uniformCount).isEqualTo(2);
1024+
assertThat(uniformCount).isEqualTo(1);
10251025

10261026
// Verify table names in keys
10271027
java.util.List<String> allTableNames =
@@ -1095,6 +1095,10 @@ public void testGetPerSourceDiscoveries() throws RetriableSchemaDiscoveryExcepti
10951095
.containsNoDuplicates();
10961096
}
10971097

1098+
/**
1099+
* Tests that {@link JdbcIoWrapper#getPerSourceDiscoveries} correctly propagates exceptions when
1100+
* schema discovery fails for a shard.
1101+
*/
10981102
@Test
10991103
public void testGetPerSourceDiscoveries_Fails() throws RetriableSchemaDiscoveryException {
11001104
SourceSchemaReference schemaRef =
@@ -1122,6 +1126,10 @@ public void testGetPerSourceDiscoveries_Fails() throws RetriableSchemaDiscoveryE
11221126
assertThrows(RuntimeException.class, () -> JdbcIoWrapper.getPerSourceDiscoveries(group));
11231127
}
11241128

1129+
/**
1130+
* Tests that {@link JdbcIoWrapper#getPerSourceDiscoveries} logs a warning and continues when
1131+
* closing a data source fails after discovery.
1132+
*/
11251133
@Test
11261134
public void testGetPerSourceDiscovery_LogsWarning_WhenCloseFails() throws Exception {
11271135
SourceSchemaReference schemaRef =
@@ -1304,7 +1312,7 @@ public void testAccumulateSpecs() {
13041312
ImmutableMap.builder();
13051313

13061314
JdbcIoWrapper.accumulateSpecs(
1307-
discovery, tableReferencesBuilder, splitSpecsBuilder, readSpecsBuilder);
1315+
ImmutableList.of(discovery), tableReferencesBuilder, splitSpecsBuilder, readSpecsBuilder);
13081316

13091317
ImmutableList<SourceTableReference> tableRefs = tableReferencesBuilder.build();
13101318
ImmutableList<TableSplitSpecification> splitSpecs = splitSpecsBuilder.build();
@@ -1357,7 +1365,7 @@ public void testAccumulateSpecs_Empty() {
13571365
ImmutableMap.builder();
13581366

13591367
JdbcIoWrapper.accumulateSpecs(
1360-
discovery, tableReferencesBuilder, splitSpecsBuilder, readSpecsBuilder);
1368+
ImmutableList.of(discovery), tableReferencesBuilder, splitSpecsBuilder, readSpecsBuilder);
13611369

13621370
assertThat(tableReferencesBuilder.build()).isEmpty();
13631371
assertThat(splitSpecsBuilder.build()).isEmpty();
@@ -1368,6 +1376,41 @@ public void testAccumulateSpecs_Empty() {
13681376
* Tests that {@link JdbcIoWrapper#accumulateSpecs} does not generate split specifications when
13691377
* the uniform partitions feature is disabled.
13701378
*/
1379+
@Test
1380+
public void testAccumulateSpecs_FeatureDisabled() {
1381+
SourceSchemaReference schemaRef =
1382+
SourceSchemaReference.ofJdbc(JdbcSchemaReference.builder().setDbName("testDB").build());
1383+
JdbcIOWrapperConfig config =
1384+
JdbcIOWrapperConfig.builderWithMySqlDefaults()
1385+
.setReadWithUniformPartitionsFeatureEnabled(false)
1386+
.setSourceDbURL("jdbc:mysql://localhost/test")
1387+
.setDbAuth(
1388+
LocalCredentialsProvider.builder().setUserName("user").setPassword("pass").build())
1389+
.setJdbcDriverJars("")
1390+
.setJdbcDriverClassName("com.mysql.cj.jdbc.Driver")
1391+
.setSourceSchemaReference(schemaRef)
1392+
.build();
1393+
TableConfig tableConfig = TableConfig.builder("testTable").setDataSourceId("shard1").build();
1394+
JdbcIoWrapper.PerSourceDiscovery discovery =
1395+
JdbcIoWrapper.PerSourceDiscovery.builder()
1396+
.setConfig(config)
1397+
.setDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create("c", "u"))
1398+
.setTableConfigs(ImmutableList.of(tableConfig))
1399+
.setSourceSchema(SourceSchema.builder().setSchemaReference(schemaRef).build())
1400+
.build();
1401+
1402+
ImmutableList.Builder<SourceTableReference> tableReferencesBuilder = ImmutableList.builder();
1403+
ImmutableList.Builder<TableSplitSpecification> splitSpecsBuilder = ImmutableList.builder();
1404+
ImmutableMap.Builder<TableIdentifier, TableReadSpecification<SourceRow>> readSpecsBuilder =
1405+
ImmutableMap.builder();
1406+
1407+
JdbcIoWrapper.accumulateSpecs(
1408+
ImmutableList.of(discovery), tableReferencesBuilder, splitSpecsBuilder, readSpecsBuilder);
1409+
1410+
assertThat(splitSpecsBuilder.build()).isEmpty();
1411+
assertThat(readSpecsBuilder.build()).isEmpty();
1412+
}
1413+
13711414
@Test
13721415
public void testGetDataSourceProvider() {
13731416
JdbcIOWrapperConfig config1 =

0 commit comments

Comments
 (0)