mirror of
https://github.com/wallabag/wallabag.git
synced 2024-12-17 21:26:27 +00:00
Merge pull request #4844 from wallabag/feature/add-clean-downloaded-images
Add a command to clean downloaded images
This commit is contained in:
commit
1bf5419e8c
3 changed files with 131 additions and 25 deletions
|
@ -7,8 +7,7 @@ parameters:
|
||||||
symfony:
|
symfony:
|
||||||
container_xml_path: %rootDir%/../../../var/cache/test/appTestDebugProjectContainer.xml
|
container_xml_path: %rootDir%/../../../var/cache/test/appTestDebugProjectContainer.xml
|
||||||
|
|
||||||
# https://github.com/phpstan/phpstan/issues/694#issuecomment-350724288
|
bootstrapFiles:
|
||||||
autoload_files:
|
|
||||||
- vendor/bin/.phpunit/phpunit-8.3-0/vendor/autoload.php
|
- vendor/bin/.phpunit/phpunit-8.3-0/vendor/autoload.php
|
||||||
|
|
||||||
inferPrivatePropertyTypeFromConstructor: true
|
inferPrivatePropertyTypeFromConstructor: true
|
||||||
|
|
101
src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php
Normal file
101
src/Wallabag/CoreBundle/Command/CleanDownloadedImagesCommand.php
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Wallabag\CoreBundle\Command;
|
||||||
|
|
||||||
|
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
|
||||||
|
use Symfony\Component\Console\Input\InputInterface;
|
||||||
|
use Symfony\Component\Console\Input\InputOption;
|
||||||
|
use Symfony\Component\Console\Output\OutputInterface;
|
||||||
|
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||||
|
use Symfony\Component\Finder\Finder;
|
||||||
|
|
||||||
|
class CleanDownloadedImagesCommand extends ContainerAwareCommand
|
||||||
|
{
|
||||||
|
protected function configure()
|
||||||
|
{
|
||||||
|
$this
|
||||||
|
->setName('wallabag:clean-downloaded-images')
|
||||||
|
->setDescription('Cleans downloaded images which are no more associated to an entry')
|
||||||
|
->addOption(
|
||||||
|
'dry-run',
|
||||||
|
null,
|
||||||
|
InputOption::VALUE_NONE,
|
||||||
|
'Do not remove images, just dump counters'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function execute(InputInterface $input, OutputInterface $output)
|
||||||
|
{
|
||||||
|
$io = new SymfonyStyle($input, $output);
|
||||||
|
|
||||||
|
$dryRun = (bool) $input->getOption('dry-run');
|
||||||
|
|
||||||
|
if ($dryRun) {
|
||||||
|
$io->text('Dry run mode <info>enabled</info> (no images will be removed)');
|
||||||
|
}
|
||||||
|
|
||||||
|
$downloadImages = $this->getContainer()->get('wallabag_core.entry.download_images');
|
||||||
|
$baseFolder = $downloadImages->getBaseFolder();
|
||||||
|
|
||||||
|
$io->text('Retrieve existing images');
|
||||||
|
|
||||||
|
// retrieve _existing_ folders in the image folder
|
||||||
|
$finder = new Finder();
|
||||||
|
$finder
|
||||||
|
->directories()
|
||||||
|
->ignoreDotFiles(true)
|
||||||
|
->depth(2)
|
||||||
|
->in($baseFolder);
|
||||||
|
|
||||||
|
$existingPaths = [];
|
||||||
|
foreach ($finder as $file) {
|
||||||
|
$existingPaths[] = $file->getFilename();
|
||||||
|
}
|
||||||
|
|
||||||
|
$io->text(sprintf(' -> <info>%d</info> images found', \count($existingPaths)));
|
||||||
|
|
||||||
|
$io->text('Retrieve valid folders attached to a user');
|
||||||
|
|
||||||
|
$entries = $this->getContainer()->get('wallabag_core.entry_repository')->findAllEntriesIdByUserId();
|
||||||
|
|
||||||
|
// retrieve _valid_ folders from existing entries
|
||||||
|
$validPaths = [];
|
||||||
|
foreach ($entries as $entry) {
|
||||||
|
$path = $downloadImages->getRelativePath($entry['id']);
|
||||||
|
|
||||||
|
if (!file_exists($baseFolder . '/' . $path)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// only store the hash, not the full path
|
||||||
|
$validPaths[] = explode('/', $path)[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
$io->text(sprintf(' -> <info>%d</info> folders found', \count($validPaths)));
|
||||||
|
|
||||||
|
$deletedCount = 0;
|
||||||
|
|
||||||
|
$io->text('Remove images');
|
||||||
|
|
||||||
|
// check if existing path are valid, if not, remove all images and the folder
|
||||||
|
foreach ($existingPaths as $existingPath) {
|
||||||
|
if (!\in_array($existingPath, $validPaths, true)) {
|
||||||
|
$fullPath = $baseFolder . '/' . $existingPath[0] . '/' . $existingPath[1] . '/' . $existingPath;
|
||||||
|
$files = glob($fullPath . '/*.*');
|
||||||
|
|
||||||
|
if (!$dryRun) {
|
||||||
|
array_map('unlink', $files);
|
||||||
|
rmdir($fullPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
$deletedCount += \count($files);
|
||||||
|
|
||||||
|
$io->text(sprintf('Deleted images in <info>%s</info>: <info>%d</info>', $existingPath, \count($files)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$io->success(sprintf('Finished cleaning. %d deleted images', $deletedCount));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
|
@ -37,6 +37,11 @@ class DownloadImages
|
||||||
$this->setFolder();
|
$this->setFolder();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getBaseFolder()
|
||||||
|
{
|
||||||
|
return $this->baseFolder;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process the html and extract images URLs from it.
|
* Process the html and extract images URLs from it.
|
||||||
*
|
*
|
||||||
|
@ -99,7 +104,7 @@ class DownloadImages
|
||||||
* @param string $url Url from where the image were found
|
* @param string $url Url from where the image were found
|
||||||
* @param string $relativePath Relative local path to saved the image
|
* @param string $relativePath Relative local path to saved the image
|
||||||
*
|
*
|
||||||
* @return string Relative url to access the image from the web
|
* @return string|false Relative url to access the image from the web
|
||||||
*/
|
*/
|
||||||
public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
|
public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
|
||||||
{
|
{
|
||||||
|
@ -210,6 +215,29 @@ class DownloadImages
|
||||||
@rmdir($folderPath);
|
@rmdir($folderPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate the folder where we are going to save images based on the entry url.
|
||||||
|
*
|
||||||
|
* @param int $entryId ID of the entry
|
||||||
|
* @param bool $createFolder Should we create the folder for the given id?
|
||||||
|
*
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public function getRelativePath($entryId, $createFolder = true)
|
||||||
|
{
|
||||||
|
$hashId = hash('crc32', $entryId);
|
||||||
|
$relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId;
|
||||||
|
$folderPath = $this->baseFolder . '/' . $relativePath;
|
||||||
|
|
||||||
|
if (!file_exists($folderPath) && $createFolder) {
|
||||||
|
mkdir($folderPath, 0777, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]);
|
||||||
|
|
||||||
|
return $relativePath;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get images urls from the srcset image attribute.
|
* Get images urls from the srcset image attribute.
|
||||||
*
|
*
|
||||||
|
@ -254,28 +282,6 @@ class DownloadImages
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate the folder where we are going to save images based on the entry url.
|
|
||||||
*
|
|
||||||
* @param int $entryId ID of the entry
|
|
||||||
*
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
private function getRelativePath($entryId)
|
|
||||||
{
|
|
||||||
$hashId = hash('crc32', $entryId);
|
|
||||||
$relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId;
|
|
||||||
$folderPath = $this->baseFolder . '/' . $relativePath;
|
|
||||||
|
|
||||||
if (!file_exists($folderPath)) {
|
|
||||||
mkdir($folderPath, 0777, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]);
|
|
||||||
|
|
||||||
return $relativePath;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Make an $url absolute based on the $base.
|
* Make an $url absolute based on the $base.
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in a new issue