From 9743058f7d71da680f10c58b1e5c90843ca38ac0 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 14 Dec 2020 22:19:19 +0100 Subject: [PATCH 1/6] Add a command to clean downloaded images There were a bug in versions prior to 2.4.0 where images weren't properly removed (mostly when coming from the API). With that command, we'll be able to remove images which aren't associated to any entries. Like other command you can pass a username to only clean one user. --- .../Command/CleanDownloadedImagesCommand.php | 123 ++++++++++++++++++ .../CoreBundle/Helper/DownloadImages.php | 50 +++---- 2 files changed, 151 insertions(+), 22 deletions(-) create mode 100644 src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php diff --git a/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php new file mode 100644 index 000000000..bb3946210 --- /dev/null +++ b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php @@ -0,0 +1,123 @@ +setName('wallabag:clean-downloaded-images') + ->setDescription('Cleans downloaded images which are no more associated to an entry') + ->addArgument( + 'username', + InputArgument::OPTIONAL, + 'User to clean' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + $this->io = new SymfonyStyle($input, $output); + + $username = $input->getArgument('username'); + + if ($username) { + try { + $user = $this->getContainer()->get('wallabag_user.user_repository')->findOneByUserName($username); + $this->clean($user); + } catch (NoResultException $e) { + $this->io->error(sprintf('User "%s" not found.', $username)); + + return 1; + } + + $this->io->success('Finished cleaning.'); + } else { + $users = $this->getContainer()->get('wallabag_user.user_repository')->findAll(); + + $this->io->text(sprintf('Cleaning through %d user accounts', \count($users))); + + foreach ($users as $user) { + $this->clean($user); + } + $this->io->success(sprintf('Finished cleaning. %d deleted images', $this->deleted)); + } + + return 0; + } + + private function clean(User $user) + { + $this->io->text(sprintf('Processing user %s', $user->getUsername())); + + $repo = $this->getContainer()->get('wallabag_core.entry_repository'); + $downloadImages = $this->getContainer()->get('wallabag_core.entry.download_images'); + $baseFolder = $downloadImages->getBaseFolder(); + + $entries = $repo->findAllEntriesIdByUserId($user->getId()); + + $deletedCount = 0; + + // first retrieve _valid_ folders from existing entries + $hashToId = []; + $validPaths = []; + foreach ($entries as $entry) { + $path = $downloadImages->getRelativePath($entry['id']); + + if (!file_exists($baseFolder . '/' . $path)) { + continue; + } + + // only store the hash, not the full path + $hash = explode('/', $path)[2]; + $validPaths[] = $hash; + $hashToId[$hash] = $entry['id']; + } + + // then retrieve _existing_ folders in the image folder + $finder = new Finder(); + $finder + ->directories() + ->ignoreDotFiles(true) + ->depth(2) + ->in($baseFolder); + + $existingPaths = []; + foreach ($finder as $file) { + $existingPaths[] = $file->getFilename(); + } + + // check if existing path are valid, if not, remove all images and the folder + foreach ($existingPaths as $existingPath) { + if (!\in_array($existingPath, $validPaths, true)) { + $fullPath = $baseFolder . '/' . $existingPath[0] . '/' . $existingPath[1] . '/' . $existingPath; + $res = array_map('unlink', glob($fullPath . '/*.*')); + + rmdir($fullPath); + + $deletedCount += \count($res); + + $this->io->text(sprintf('Deleted images in %s: %d', $existingPath, \count($res))); + } + } + + $this->deleted += $deletedCount; + + $this->io->text(sprintf('Deleted %d images for user %s', $deletedCount, $user->getUserName())); + } +} diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 1d98fd1a9..601b4f314 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -37,6 +37,11 @@ class DownloadImages $this->setFolder(); } + public function getBaseFolder() + { + return $this->baseFolder; + } + /** * Process the html and extract images URLs from it. * @@ -210,6 +215,29 @@ class DownloadImages @rmdir($folderPath); } + /** + * Generate the folder where we are going to save images based on the entry url. + * + * @param int $entryId ID of the entry + * @param bool $createFolder Should we create the folder for the given id? + * + * @return string + */ + public function getRelativePath($entryId, $createFolder = true) + { + $hashId = hash('crc32', $entryId); + $relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId; + $folderPath = $this->baseFolder . '/' . $relativePath; + + if (!file_exists($folderPath) && $createFolder) { + mkdir($folderPath, 0777, true); + } + + $this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]); + + return $relativePath; + } + /** * Get images urls from the srcset image attribute. * @@ -254,28 +282,6 @@ class DownloadImages } } - /** - * Generate the folder where we are going to save images based on the entry url. - * - * @param int $entryId ID of the entry - * - * @return string - */ - private function getRelativePath($entryId) - { - $hashId = hash('crc32', $entryId); - $relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId; - $folderPath = $this->baseFolder . '/' . $relativePath; - - if (!file_exists($folderPath)) { - mkdir($folderPath, 0777, true); - } - - $this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]); - - return $relativePath; - } - /** * Make an $url absolute based on the $base. * From 9007459605d3272bba05386ea3180b91d1be48f7 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 14 Dec 2020 22:21:09 +0100 Subject: [PATCH 2/6] Fix PHPStan deprecated config option --- phpstan.neon | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/phpstan.neon b/phpstan.neon index e441081c3..e981ab187 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -7,8 +7,7 @@ parameters: symfony: container_xml_path: %rootDir%/../../../var/cache/test/appTestDebugProjectContainer.xml - # https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288 - autoload_files: + bootstrapFiles: - vendor/bin/.phpunit/phpunit-8.3-0/vendor/autoload.php inferPrivatePropertyTypeFromConstructor: true From c7a880079401387243fc30d25b4d902ec2c7264c Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Tue, 15 Dec 2020 10:06:35 +0100 Subject: [PATCH 3/6] Fix phpDoc --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 601b4f314..018bc2c6c 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -104,7 +104,7 @@ class DownloadImages * @param string $url Url from where the image were found * @param string $relativePath Relative local path to saved the image * - * @return string Relative url to access the image from the web + * @return string|false Relative url to access the image from the web */ public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) { From 478c20d3a41708d0371cfb33befb47b4f2d51fe4 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 16 Dec 2020 16:54:17 +0100 Subject: [PATCH 4/6] Remove username argument and add a dry-run option --- .../Command/CleanDownloadedImagesCommand.php | 55 +++++++++---------- 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php index bb3946210..ae6a3accc 100644 --- a/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php +++ b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php @@ -2,10 +2,9 @@ namespace Wallabag\CoreBundle\Command; -use Doctrine\ORM\NoResultException; use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; -use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Style\SymfonyStyle; use Symfony\Component\Finder\Finder; @@ -23,10 +22,11 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand $this ->setName('wallabag:clean-downloaded-images') ->setDescription('Cleans downloaded images which are no more associated to an entry') - ->addArgument( - 'username', - InputArgument::OPTIONAL, - 'User to clean' + ->addOption( + 'dry-run', + null, + InputOption::VALUE_NONE, + 'Do not remove images, just dump counters' ); } @@ -34,34 +34,26 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand { $this->io = new SymfonyStyle($input, $output); - $username = $input->getArgument('username'); + $dryRun = (bool) $input->getOption('dry-run'); - if ($username) { - try { - $user = $this->getContainer()->get('wallabag_user.user_repository')->findOneByUserName($username); - $this->clean($user); - } catch (NoResultException $e) { - $this->io->error(sprintf('User "%s" not found.', $username)); + $users = $this->getContainer()->get('wallabag_user.user_repository')->findAll(); - return 1; - } + $this->io->text(sprintf('Cleaning through %d user accounts', \count($users))); - $this->io->success('Finished cleaning.'); - } else { - $users = $this->getContainer()->get('wallabag_user.user_repository')->findAll(); - - $this->io->text(sprintf('Cleaning through %d user accounts', \count($users))); - - foreach ($users as $user) { - $this->clean($user); - } - $this->io->success(sprintf('Finished cleaning. %d deleted images', $this->deleted)); + if ($dryRun) { + $this->io->text('Dry run mode enabled (no images will be removed)'); } + foreach ($users as $user) { + $this->clean($user, $dryRun); + } + + $this->io->success(sprintf('Finished cleaning. %d deleted images', $this->deleted)); + return 0; } - private function clean(User $user) + private function clean(User $user, bool $dryRun) { $this->io->text(sprintf('Processing user %s', $user->getUsername())); @@ -106,13 +98,16 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand foreach ($existingPaths as $existingPath) { if (!\in_array($existingPath, $validPaths, true)) { $fullPath = $baseFolder . '/' . $existingPath[0] . '/' . $existingPath[1] . '/' . $existingPath; - $res = array_map('unlink', glob($fullPath . '/*.*')); + $files = glob($fullPath . '/*.*'); - rmdir($fullPath); + if (!$dryRun) { + array_map('unlink', $files); + rmdir($fullPath); + } - $deletedCount += \count($res); + $deletedCount += \count($files); - $this->io->text(sprintf('Deleted images in %s: %d', $existingPath, \count($res))); + $this->io->text(sprintf('Deleted images in %s: %d', $existingPath, \count($files))); } } From 5437f0e3da7a07372195d24c76b338da10d75a18 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 16 Dec 2020 17:26:13 +0100 Subject: [PATCH 5/6] Build images list outside of `clean` --- .../Command/CleanDownloadedImagesCommand.php | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php index ae6a3accc..de5cfc509 100644 --- a/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php +++ b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php @@ -8,14 +8,16 @@ use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Style\SymfonyStyle; use Symfony\Component\Finder\Finder; +use Wallabag\CoreBundle\Helper\DownloadImages; use Wallabag\UserBundle\Entity\User; class CleanDownloadedImagesCommand extends ContainerAwareCommand { /** @var SymfonyStyle */ protected $io; - protected $deleted = 0; + /** @var DownloadImages */ + protected $downloadImages; protected function configure() { @@ -44,8 +46,23 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand $this->io->text('Dry run mode enabled (no images will be removed)'); } + $this->downloadImages = $this->getContainer()->get('wallabag_core.entry.download_images'); + + // retrieve _existing_ folders in the image folder + $finder = new Finder(); + $finder + ->directories() + ->ignoreDotFiles(true) + ->depth(2) + ->in($this->downloadImages->getBaseFolder()); + + $existingPaths = []; + foreach ($finder as $file) { + $existingPaths[] = $file->getFilename(); + } + foreach ($users as $user) { - $this->clean($user, $dryRun); + $this->clean($user, $existingPaths, $dryRun); } $this->io->success(sprintf('Finished cleaning. %d deleted images', $this->deleted)); @@ -53,45 +70,27 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand return 0; } - private function clean(User $user, bool $dryRun) + private function clean(User $user, array $existingPaths, bool $dryRun) { $this->io->text(sprintf('Processing user %s', $user->getUsername())); $repo = $this->getContainer()->get('wallabag_core.entry_repository'); - $downloadImages = $this->getContainer()->get('wallabag_core.entry.download_images'); - $baseFolder = $downloadImages->getBaseFolder(); - + $baseFolder = $this->downloadImages->getBaseFolder(); $entries = $repo->findAllEntriesIdByUserId($user->getId()); $deletedCount = 0; - // first retrieve _valid_ folders from existing entries - $hashToId = []; + // retrieve _valid_ folders from existing entries $validPaths = []; foreach ($entries as $entry) { - $path = $downloadImages->getRelativePath($entry['id']); + $path = $this->downloadImages->getRelativePath($entry['id']); if (!file_exists($baseFolder . '/' . $path)) { continue; } // only store the hash, not the full path - $hash = explode('/', $path)[2]; - $validPaths[] = $hash; - $hashToId[$hash] = $entry['id']; - } - - // then retrieve _existing_ folders in the image folder - $finder = new Finder(); - $finder - ->directories() - ->ignoreDotFiles(true) - ->depth(2) - ->in($baseFolder); - - $existingPaths = []; - foreach ($finder as $file) { - $existingPaths[] = $file->getFilename(); + $validPaths[] = explode('/', $path)[2]; } // check if existing path are valid, if not, remove all images and the folder From fb4d1fd10b9c757512b79dcd2491f5882b0ae303 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 16 Dec 2020 17:45:28 +0100 Subject: [PATCH 6/6] Compare full list to full list --- .../Command/CleanDownloadedImagesCommand.php | 56 +++++++------------ 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php index de5cfc509..d81becdc3 100644 --- a/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php +++ b/src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php @@ -8,17 +8,9 @@ use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Style\SymfonyStyle; use Symfony\Component\Finder\Finder; -use Wallabag\CoreBundle\Helper\DownloadImages; -use Wallabag\UserBundle\Entity\User; class CleanDownloadedImagesCommand extends ContainerAwareCommand { - /** @var SymfonyStyle */ - protected $io; - protected $deleted = 0; - /** @var DownloadImages */ - protected $downloadImages; - protected function configure() { $this @@ -34,19 +26,18 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand protected function execute(InputInterface $input, OutputInterface $output) { - $this->io = new SymfonyStyle($input, $output); + $io = new SymfonyStyle($input, $output); $dryRun = (bool) $input->getOption('dry-run'); - $users = $this->getContainer()->get('wallabag_user.user_repository')->findAll(); - - $this->io->text(sprintf('Cleaning through %d user accounts', \count($users))); - if ($dryRun) { - $this->io->text('Dry run mode enabled (no images will be removed)'); + $io->text('Dry run mode enabled (no images will be removed)'); } - $this->downloadImages = $this->getContainer()->get('wallabag_core.entry.download_images'); + $downloadImages = $this->getContainer()->get('wallabag_core.entry.download_images'); + $baseFolder = $downloadImages->getBaseFolder(); + + $io->text('Retrieve existing images'); // retrieve _existing_ folders in the image folder $finder = new Finder(); @@ -54,36 +45,23 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand ->directories() ->ignoreDotFiles(true) ->depth(2) - ->in($this->downloadImages->getBaseFolder()); + ->in($baseFolder); $existingPaths = []; foreach ($finder as $file) { $existingPaths[] = $file->getFilename(); } - foreach ($users as $user) { - $this->clean($user, $existingPaths, $dryRun); - } + $io->text(sprintf(' -> %d images found', \count($existingPaths))); - $this->io->success(sprintf('Finished cleaning. %d deleted images', $this->deleted)); + $io->text('Retrieve valid folders attached to a user'); - return 0; - } - - private function clean(User $user, array $existingPaths, bool $dryRun) - { - $this->io->text(sprintf('Processing user %s', $user->getUsername())); - - $repo = $this->getContainer()->get('wallabag_core.entry_repository'); - $baseFolder = $this->downloadImages->getBaseFolder(); - $entries = $repo->findAllEntriesIdByUserId($user->getId()); - - $deletedCount = 0; + $entries = $this->getContainer()->get('wallabag_core.entry_repository')->findAllEntriesIdByUserId(); // retrieve _valid_ folders from existing entries $validPaths = []; foreach ($entries as $entry) { - $path = $this->downloadImages->getRelativePath($entry['id']); + $path = $downloadImages->getRelativePath($entry['id']); if (!file_exists($baseFolder . '/' . $path)) { continue; @@ -93,6 +71,12 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand $validPaths[] = explode('/', $path)[2]; } + $io->text(sprintf(' -> %d folders found', \count($validPaths))); + + $deletedCount = 0; + + $io->text('Remove images'); + // check if existing path are valid, if not, remove all images and the folder foreach ($existingPaths as $existingPath) { if (!\in_array($existingPath, $validPaths, true)) { @@ -106,12 +90,12 @@ class CleanDownloadedImagesCommand extends ContainerAwareCommand $deletedCount += \count($files); - $this->io->text(sprintf('Deleted images in %s: %d', $existingPath, \count($files))); + $io->text(sprintf('Deleted images in %s: %d', $existingPath, \count($files))); } } - $this->deleted += $deletedCount; + $io->success(sprintf('Finished cleaning. %d deleted images', $deletedCount)); - $this->io->text(sprintf('Deleted %d images for user %s', $deletedCount, $user->getUserName())); + return 0; } }