From 5a6d9180fed81a30cb91ef3fed30176cd4402116 Mon Sep 17 00:00:00 2001 From: JPVenson Date: Thu, 25 Sep 2025 00:20:30 +0300 Subject: Add People Dedup and multiple progress fixes (#14848) --- .../ScheduledTasks/Tasks/PeopleValidationTask.cs | 66 +++++++++++++++++++++- 1 file changed, 63 insertions(+), 3 deletions(-) (limited to 'Emby.Server.Implementations/ScheduledTasks') diff --git a/Emby.Server.Implementations/ScheduledTasks/Tasks/PeopleValidationTask.cs b/Emby.Server.Implementations/ScheduledTasks/Tasks/PeopleValidationTask.cs index 18162ad2fc..6e4e5c7808 100644 --- a/Emby.Server.Implementations/ScheduledTasks/Tasks/PeopleValidationTask.cs +++ b/Emby.Server.Implementations/ScheduledTasks/Tasks/PeopleValidationTask.cs @@ -1,10 +1,14 @@ using System; +using System.Buffers; using System.Collections.Generic; +using System.Linq; using System.Threading; using System.Threading.Tasks; +using Jellyfin.Database.Implementations; using MediaBrowser.Controller.Library; using MediaBrowser.Model.Globalization; using MediaBrowser.Model.Tasks; +using Microsoft.EntityFrameworkCore; namespace Emby.Server.Implementations.ScheduledTasks.Tasks; @@ -15,16 +19,19 @@ public class PeopleValidationTask : IScheduledTask, IConfigurableScheduledTask { private readonly ILibraryManager _libraryManager; private readonly ILocalizationManager _localization; + private readonly IDbContextFactory _dbContextFactory; /// /// Initializes a new instance of the class. /// /// Instance of the interface. /// Instance of the interface. - public PeopleValidationTask(ILibraryManager libraryManager, ILocalizationManager localization) + /// Instance of the interface. + public PeopleValidationTask(ILibraryManager libraryManager, ILocalizationManager localization, IDbContextFactory dbContextFactory) { _libraryManager = libraryManager; _localization = localization; + _dbContextFactory = dbContextFactory; } /// @@ -62,8 +69,61 @@ public class PeopleValidationTask : IScheduledTask, IConfigurableScheduledTask } /// - public Task ExecuteAsync(IProgress progress, CancellationToken cancellationToken) + public async Task ExecuteAsync(IProgress progress, CancellationToken cancellationToken) { - return _libraryManager.ValidatePeopleAsync(progress, cancellationToken); + IProgress subProgress = new Progress((val) => progress.Report(val / 2)); + await _libraryManager.ValidatePeopleAsync(subProgress, cancellationToken).ConfigureAwait(false); + + subProgress = new Progress((val) => progress.Report((val / 2) + 50)); + var context = await _dbContextFactory.CreateDbContextAsync(cancellationToken).ConfigureAwait(false); + await using (context.ConfigureAwait(false)) + { + var dupQuery = context.Peoples + .GroupBy(e => new { e.Name, e.PersonType }) + .Where(e => e.Count() > 1) + .Select(e => e.Select(f => f.Id).ToArray()); + + var total = dupQuery.Count(); + + const int PartitionSize = 100; + var iterator = 0; + int itemCounter; + var buffer = ArrayPool.Shared.Rent(PartitionSize)!; + try + { + do + { + itemCounter = 0; + await foreach (var item in dupQuery + .Take(PartitionSize) + .AsAsyncEnumerable() + .WithCancellation(cancellationToken) + .ConfigureAwait(false)) + { + buffer[itemCounter++] = item; + } + + for (int i = 0; i < itemCounter; i++) + { + var item = buffer[i]; + var reference = item[0]; + var dups = item[1..]; + await context.PeopleBaseItemMap.WhereOneOrMany(dups, e => e.PeopleId) + .ExecuteUpdateAsync(e => e.SetProperty(f => f.PeopleId, reference), cancellationToken) + .ConfigureAwait(false); + await context.Peoples.Where(e => dups.Contains(e.Id)).ExecuteDeleteAsync(cancellationToken).ConfigureAwait(false); + subProgress.Report(100f / total * ((iterator * PartitionSize) + i)); + } + + iterator++; + } while (itemCounter == PartitionSize && !cancellationToken.IsCancellationRequested); + } + finally + { + ArrayPool.Shared.Return(buffer); + } + + subProgress.Report(100); + } } } -- cgit v1.2.3