2015-10-15 18:06:59 +00:00
< ? php
namespace Wallabag\CoreBundle\Helper ;
2017-08-26 22:04:21 +00:00
use Html2Text\Html2Text ;
2015-12-22 09:16:34 +00:00
use JMS\Serializer\SerializationContext ;
use JMS\Serializer\SerializerBuilder ;
2015-10-15 18:06:59 +00:00
use PHPePub\Core\EPub ;
use PHPePub\Core\Structure\OPF\DublinCore ;
2015-10-16 08:51:53 +00:00
use Symfony\Component\HttpFoundation\Response ;
2017-10-08 06:55:30 +00:00
use Symfony\Component\Translation\TranslatorInterface ;
2017-07-29 20:51:50 +00:00
use Wallabag\CoreBundle\Entity\Entry ;
2015-10-15 18:06:59 +00:00
2015-10-30 19:57:10 +00:00
/**
* This class doesn 't have unit test BUT it' s fully covered by a functional test with ExportControllerTest .
*/
2015-10-15 18:06:59 +00:00
class EntriesExport
{
2015-10-16 08:51:53 +00:00
private $wallabagUrl ;
private $logoPath ;
2017-10-08 06:55:30 +00:00
private $translator ;
2015-10-16 08:51:53 +00:00
private $title = '' ;
2016-04-12 09:36:01 +00:00
private $entries = [];
2017-07-08 15:55:58 +00:00
private $author = 'wallabag' ;
2015-10-16 08:51:53 +00:00
private $language = '' ;
2015-10-15 18:06:59 +00:00
2015-10-16 08:51:53 +00:00
/**
2017-10-11 08:43:36 +00:00
* @ param TranslatorInterface $translator Translator service
* @ param string $wallabagUrl Wallabag instance url
* @ param string $logoPath Path to the logo FROM THE BUNDLE SCOPE
2015-10-16 08:51:53 +00:00
*/
2017-10-08 09:03:32 +00:00
public function __construct ( TranslatorInterface $translator , $wallabagUrl , $logoPath )
2015-10-15 18:06:59 +00:00
{
2017-10-08 09:03:32 +00:00
$this -> translator = $translator ;
2016-10-30 19:27:41 +00:00
$this -> wallabagUrl = $wallabagUrl ;
2015-10-16 08:51:53 +00:00
$this -> logoPath = $logoPath ;
}
/**
* Define entries .
*
* @ param array | Entry $entries An array of entries or one entry
2016-04-12 09:36:01 +00:00
*
* @ return EntriesExport
2015-10-16 08:51:53 +00:00
*/
public function setEntries ( $entries )
{
2018-09-05 12:25:32 +00:00
if ( ! \is_array ( $entries )) {
2015-10-16 08:51:53 +00:00
$this -> language = $entries -> getLanguage ();
2016-04-12 09:36:01 +00:00
$entries = [ $entries ];
2015-10-16 08:51:53 +00:00
}
2015-10-15 18:06:59 +00:00
$this -> entries = $entries ;
2015-10-16 08:51:53 +00:00
return $this ;
2015-10-15 18:06:59 +00:00
}
/**
* Sets the category of which we want to get articles , or just one entry .
*
* @ param string $method Method to get articles
2016-04-12 09:36:01 +00:00
*
* @ return EntriesExport
2015-10-15 18:06:59 +00:00
*/
2015-10-16 08:51:53 +00:00
public function updateTitle ( $method )
2015-10-15 18:06:59 +00:00
{
2017-10-28 18:16:43 +00:00
$this -> title = $method . ' articles' ;
2015-10-16 08:51:53 +00:00
if ( 'entry' === $method ) {
$this -> title = $this -> entries [ 0 ] -> getTitle ();
2015-10-15 18:06:59 +00:00
}
2015-10-16 08:51:53 +00:00
return $this ;
2015-10-15 18:06:59 +00:00
}
2017-07-08 15:55:58 +00:00
/**
2017-07-09 16:33:14 +00:00
* Sets the author for one entry or category .
*
* The publishers are used , or the domain name if empty .
2017-07-08 15:55:58 +00:00
*
* @ param string $method Method to get articles
*
* @ return EntriesExport
*/
public function updateAuthor ( $method )
{
2017-07-10 07:58:18 +00:00
if ( 'entry' !== $method ) {
2017-10-28 18:16:43 +00:00
$this -> author = $method . ' authors' ;
2017-07-08 15:55:58 +00:00
2017-07-10 07:58:18 +00:00
return $this ;
}
$this -> author = $this -> entries [ 0 ] -> getDomainName ();
$publishedBy = $this -> entries [ 0 ] -> getPublishedBy ();
if ( ! empty ( $publishedBy )) {
$this -> author = implode ( ', ' , $publishedBy );
2017-07-08 15:55:58 +00:00
}
return $this ;
}
2015-10-15 18:06:59 +00:00
/**
* Sets the output format .
*
* @ param string $format
2016-04-12 09:36:01 +00:00
*
* @ return Response
2015-10-15 18:06:59 +00:00
*/
public function exportAs ( $format )
{
2017-10-28 18:16:43 +00:00
$functionName = 'produce' . ucfirst ( $format );
2016-03-27 21:32:55 +00:00
if ( method_exists ( $this , $functionName )) {
return $this -> $functionName ();
2015-10-15 18:06:59 +00:00
}
2015-10-16 08:51:53 +00:00
throw new \InvalidArgumentException ( sprintf ( 'The format "%s" is not yet supported.' , $format ));
2015-10-15 18:06:59 +00:00
}
2017-01-22 11:51:14 +00:00
public function exportJsonData ()
{
return $this -> prepareSerializingContent ( 'json' );
}
2015-10-16 08:51:53 +00:00
/**
* Use PHPePub to dump a . epub file .
2016-04-12 09:36:01 +00:00
*
* @ return Response
2015-10-16 08:51:53 +00:00
*/
2015-10-15 18:06:59 +00:00
private function produceEpub ()
{
/*
* Start and End of the book
*/
$content_start =
" <?xml version= \" 1.0 \" encoding= \" UTF-8 \" ?> \n "
2017-10-28 18:16:43 +00:00
. " <html xmlns= \" http://www.w3.org/1999/xhtml \" xmlns:epub= \" http://www.idpf.org/2007/ops \" > \n "
. '<head>'
. " <meta http-equiv= \" Default-Style \" content= \" text/html; charset=utf-8 \" /> \n "
. " <title>wallabag articles book</title> \n "
. " </head> \n "
. " <body> \n " ;
2015-10-15 18:06:59 +00:00
$bookEnd = " </body> \n </html> \n " ;
$book = new EPub ( EPub :: BOOK_VERSION_EPUB3 );
/*
* Book metadata
*/
$book -> setTitle ( $this -> title );
2015-10-16 08:51:53 +00:00
// Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
$book -> setLanguage ( $this -> language );
$book -> setDescription ( 'Some articles saved on my wallabag' );
2015-10-15 18:06:59 +00:00
2017-07-08 15:55:58 +00:00
$book -> setAuthor ( $this -> author , $this -> author );
2015-10-15 18:06:59 +00:00
2015-10-16 08:51:53 +00:00
// I hope this is a non existant address :)
$book -> setPublisher ( 'wallabag' , 'wallabag' );
// Strictly not needed as the book date defaults to time().
$book -> setDate ( time ());
$book -> setSourceURL ( $this -> wallabagUrl );
2015-10-15 18:06:59 +00:00
$book -> addDublinCoreMetadata ( DublinCore :: CONTRIBUTOR , 'PHP' );
$book -> addDublinCoreMetadata ( DublinCore :: CONTRIBUTOR , 'wallabag' );
/*
* Front page
*/
2015-10-16 08:51:53 +00:00
if ( file_exists ( $this -> logoPath )) {
$book -> setCoverImage ( 'Cover.png' , file_get_contents ( $this -> logoPath ), 'image/png' );
}
2015-10-15 18:06:59 +00:00
2019-01-06 17:38:02 +00:00
$entryIds = [];
2019-01-06 18:13:26 +00:00
$entryCount = \count ( $this -> entries );
$i = 0 ;
2019-01-06 17:38:02 +00:00
2015-10-15 18:06:59 +00:00
/*
* Adding actual entries
*/
2015-10-16 08:51:53 +00:00
// set tags as subjects
foreach ( $this -> entries as $entry ) {
2019-01-06 18:13:26 +00:00
++ $i ;
2016-10-02 14:06:42 +00:00
foreach ( $entry -> getTags () as $tag ) {
$book -> setSubject ( $tag -> getLabel ());
2015-10-16 08:51:53 +00:00
}
2019-01-06 18:13:26 +00:00
$filename = sha1 ( $entry -> getTitle ());
2016-09-18 12:43:54 +00:00
2017-10-28 18:16:43 +00:00
$titlepage = $content_start . '<h1>' . $entry -> getTitle () . '</h1>' . $this -> getExportInformation ( 'PHPePub' ) . $bookEnd ;
2019-01-06 18:13:26 +00:00
$book -> addChapter ( " Entry { $i } of { $entryCount } " , " { $filename } _cover.html " , $titlepage , true , EPub :: EXTERNAL_REF_ADD );
2017-10-28 18:16:43 +00:00
$chapter = $content_start . $entry -> getContent () . $bookEnd ;
2019-01-06 17:38:02 +00:00
$entryIds [] = $entry -> getId ();
2019-01-06 18:13:26 +00:00
$book -> addChapter ( $entry -> getTitle (), " { $filename } .html " , $chapter , true , EPub :: EXTERNAL_REF_ADD );
2015-10-15 18:06:59 +00:00
}
2015-10-16 08:51:53 +00:00
2019-01-06 17:38:02 +00:00
// Could also be the ISBN number, prefered for published books, or a UUID.
$hash = sha1 ( sprintf ( '%s:%s' , $this -> wallabagUrl , implode ( ',' , $entryIds )));
$book -> setIdentifier ( sprintf ( 'urn:wallabag:%s' , $hash ), EPub :: IDENTIFIER_URI );
2015-10-16 08:51:53 +00:00
return Response :: create (
$book -> getBook (),
200 ,
2016-04-12 09:36:01 +00:00
[
2015-10-16 08:51:53 +00:00
'Content-Description' => 'File Transfer' ,
'Content-type' => 'application/epub+zip' ,
2017-10-28 18:16:43 +00:00
'Content-Disposition' => 'attachment; filename="' . $this -> title . '.epub"' ,
2015-10-16 08:51:53 +00:00
'Content-Transfer-Encoding' => 'binary' ,
2016-04-12 09:36:01 +00:00
]
2016-02-05 12:35:59 +00:00
);
2015-10-15 18:06:59 +00:00
}
2015-10-16 08:51:53 +00:00
/**
* Use PHPMobi to dump a . mobi file .
2016-04-12 09:36:01 +00:00
*
* @ return Response
2015-10-16 08:51:53 +00:00
*/
2015-10-15 18:06:59 +00:00
private function produceMobi ()
{
$mobi = new \MOBI ();
$content = new \MOBIFile ();
/*
* Book metadata
*/
$content -> set ( 'title' , $this -> title );
2017-07-08 15:55:58 +00:00
$content -> set ( 'author' , $this -> author );
2015-10-15 18:06:59 +00:00
$content -> set ( 'subject' , $this -> title );
/*
* Front page
*/
2015-10-16 08:51:53 +00:00
$content -> appendParagraph ( $this -> getExportInformation ( 'PHPMobi' ));
if ( file_exists ( $this -> logoPath )) {
$content -> appendImage ( imagecreatefrompng ( $this -> logoPath ));
}
2015-10-15 18:06:59 +00:00
$content -> appendPageBreak ();
/*
* Adding actual entries
*/
foreach ( $this -> entries as $entry ) {
$content -> appendChapterTitle ( $entry -> getTitle ());
$content -> appendParagraph ( $entry -> getContent ());
$content -> appendPageBreak ();
}
$mobi -> setContentProvider ( $content );
// the browser inside Kindle Devices doesn't likes special caracters either, we limit to A-z/0-9
$this -> title = preg_replace ( '/[^A-Za-z0-9\-]/' , '' , $this -> title );
2015-10-16 08:51:53 +00:00
return Response :: create (
$mobi -> toString (),
200 ,
2016-04-12 09:36:01 +00:00
[
2015-10-16 08:51:53 +00:00
'Accept-Ranges' => 'bytes' ,
'Content-Description' => 'File Transfer' ,
'Content-type' => 'application/x-mobipocket-ebook' ,
2017-10-28 18:16:43 +00:00
'Content-Disposition' => 'attachment; filename="' . $this -> title . '.mobi"' ,
2015-10-16 08:51:53 +00:00
'Content-Transfer-Encoding' => 'binary' ,
2016-04-12 09:36:01 +00:00
]
2016-02-05 12:35:59 +00:00
);
2015-10-15 18:06:59 +00:00
}
2015-10-16 08:51:53 +00:00
/**
* Use TCPDF to dump a . pdf file .
2016-04-12 09:36:01 +00:00
*
* @ return Response
2015-10-16 08:51:53 +00:00
*/
2016-03-27 21:32:55 +00:00
private function producePdf ()
2015-10-15 18:06:59 +00:00
{
$pdf = new \TCPDF ( PDF_PAGE_ORIENTATION , PDF_UNIT , PDF_PAGE_FORMAT , true , 'UTF-8' , false );
/*
* Book metadata
*/
$pdf -> SetCreator ( PDF_CREATOR );
2017-07-08 15:55:58 +00:00
$pdf -> SetAuthor ( $this -> author );
2015-10-15 18:06:59 +00:00
$pdf -> SetTitle ( $this -> title );
$pdf -> SetSubject ( 'Articles via wallabag' );
$pdf -> SetKeywords ( 'wallabag' );
/*
* Front page
*/
$pdf -> AddPage ();
2017-10-28 18:16:43 +00:00
$intro = '<h1>' . $this -> title . '</h1>' . $this -> getExportInformation ( 'tcpdf' );
2015-10-15 18:06:59 +00:00
$pdf -> writeHTMLCell ( 0 , 0 , '' , '' , $intro , 0 , 1 , 0 , true , '' , true );
/*
* Adding actual entries
*/
foreach ( $this -> entries as $entry ) {
2016-10-02 14:06:42 +00:00
foreach ( $entry -> getTags () as $tag ) {
$pdf -> SetKeywords ( $tag -> getLabel ());
2015-10-15 18:06:59 +00:00
}
$pdf -> AddPage ();
2017-10-28 18:16:43 +00:00
$html = '<h1>' . $entry -> getTitle () . '</h1>' ;
2015-10-15 18:06:59 +00:00
$html .= $entry -> getContent ();
2015-10-16 08:51:53 +00:00
2015-10-15 18:06:59 +00:00
$pdf -> writeHTMLCell ( 0 , 0 , '' , '' , $html , 0 , 1 , 0 , true , '' , true );
}
// set image scale factor
$pdf -> setImageScale ( PDF_IMAGE_SCALE_RATIO );
2015-10-16 08:51:53 +00:00
return Response :: create (
$pdf -> Output ( '' , 'S' ),
200 ,
2016-04-12 09:36:01 +00:00
[
2015-10-16 08:51:53 +00:00
'Content-Description' => 'File Transfer' ,
'Content-type' => 'application/pdf' ,
2017-10-28 18:16:43 +00:00
'Content-Disposition' => 'attachment; filename="' . $this -> title . '.pdf"' ,
2015-10-16 08:51:53 +00:00
'Content-Transfer-Encoding' => 'binary' ,
2016-04-12 09:36:01 +00:00
]
2016-02-05 12:35:59 +00:00
);
2015-10-15 18:06:59 +00:00
}
2015-10-16 08:51:53 +00:00
/**
* Inspired from CsvFileDumper .
2016-04-12 09:36:01 +00:00
*
* @ return Response
2015-10-16 08:51:53 +00:00
*/
2016-03-27 21:32:55 +00:00
private function produceCsv ()
2015-10-15 18:06:59 +00:00
{
2015-10-16 08:51:53 +00:00
$delimiter = ';' ;
$enclosure = '"' ;
2018-09-05 12:25:32 +00:00
$handle = fopen ( 'php://memory' , 'b+r' );
2015-10-15 18:06:59 +00:00
2016-09-08 14:38:08 +00:00
fputcsv ( $handle , [ 'Title' , 'URL' , 'Content' , 'Tags' , 'MIME Type' , 'Language' , 'Creation date' ], $delimiter , $enclosure );
2015-10-15 18:06:59 +00:00
foreach ( $this -> entries as $entry ) {
2015-10-16 08:51:53 +00:00
fputcsv (
$handle ,
2016-04-12 09:36:01 +00:00
[
2015-10-16 08:51:53 +00:00
$entry -> getTitle (),
$entry -> getURL (),
2015-10-30 19:57:10 +00:00
// remove new line to avoid crazy results
2016-04-12 09:36:01 +00:00
str_replace ([ " \r \n " , " \r " , " \n " ], '' , $entry -> getContent ()),
2015-10-16 08:51:53 +00:00
implode ( ', ' , $entry -> getTags () -> toArray ()),
$entry -> getMimetype (),
$entry -> getLanguage (),
2016-09-08 14:38:08 +00:00
$entry -> getCreatedAt () -> format ( 'd/m/Y h:i:s' ),
2016-04-12 09:36:01 +00:00
],
2015-10-16 08:51:53 +00:00
$delimiter ,
$enclosure
);
}
rewind ( $handle );
$output = stream_get_contents ( $handle );
fclose ( $handle );
return Response :: create (
$output ,
200 ,
2016-04-12 09:36:01 +00:00
[
2015-10-16 08:51:53 +00:00
'Content-type' => 'application/csv' ,
2017-10-28 18:16:43 +00:00
'Content-Disposition' => 'attachment; filename="' . $this -> title . '.csv"' ,
2015-10-16 08:51:53 +00:00
'Content-Transfer-Encoding' => 'UTF-8' ,
2016-04-12 09:36:01 +00:00
]
2016-02-05 12:35:59 +00:00
);
2015-10-16 08:51:53 +00:00
}
2016-04-12 09:36:01 +00:00
/**
* Dump a JSON file .
*
* @ return Response
*/
2016-03-27 21:32:55 +00:00
private function produceJson ()
2015-10-18 13:49:00 +00:00
{
return Response :: create (
2015-10-18 13:59:15 +00:00
$this -> prepareSerializingContent ( 'json' ),
2015-10-18 13:49:00 +00:00
200 ,
2016-04-12 09:36:01 +00:00
[
2015-10-18 13:49:00 +00:00
'Content-type' => 'application/json' ,
2017-10-28 18:16:43 +00:00
'Content-Disposition' => 'attachment; filename="' . $this -> title . '.json"' ,
2015-10-18 13:49:00 +00:00
'Content-Transfer-Encoding' => 'UTF-8' ,
2016-04-12 09:36:01 +00:00
]
2016-02-05 12:35:59 +00:00
);
2015-10-18 13:49:00 +00:00
}
2016-04-12 09:36:01 +00:00
/**
* Dump a XML file .
*
* @ return Response
*/
2016-03-27 21:32:55 +00:00
private function produceXml ()
2015-10-18 13:49:00 +00:00
{
return Response :: create (
2015-10-18 13:59:15 +00:00
$this -> prepareSerializingContent ( 'xml' ),
2015-10-18 13:49:00 +00:00
200 ,
2016-04-12 09:36:01 +00:00
[
2015-10-18 13:49:00 +00:00
'Content-type' => 'application/xml' ,
2017-10-28 18:16:43 +00:00
'Content-Disposition' => 'attachment; filename="' . $this -> title . '.xml"' ,
2015-10-18 13:49:00 +00:00
'Content-Transfer-Encoding' => 'UTF-8' ,
2016-04-12 09:36:01 +00:00
]
2016-02-05 12:35:59 +00:00
);
2015-10-18 13:49:00 +00:00
}
2015-10-18 13:59:15 +00:00
2016-04-12 09:36:01 +00:00
/**
* Dump a TXT file .
*
* @ return Response
*/
2016-03-27 21:32:55 +00:00
private function produceTxt ()
2016-01-25 16:31:45 +00:00
{
$content = '' ;
2016-02-01 14:35:30 +00:00
$bar = str_repeat ( '=' , 100 );
2016-01-25 16:31:45 +00:00
foreach ( $this -> entries as $entry ) {
2017-10-28 18:16:43 +00:00
$content .= " \n \n " . $bar . " \n \n " . $entry -> getTitle () . " \n \n " . $bar . " \n \n " ;
2017-08-26 22:04:21 +00:00
$html = new Html2Text ( $entry -> getContent (), [ 'do_links' => 'none' , 'width' => 100 ]);
$content .= $html -> getText ();
2016-01-25 16:31:45 +00:00
}
2016-02-01 14:35:30 +00:00
2016-01-25 16:31:45 +00:00
return Response :: create (
$content ,
200 ,
2016-04-12 09:36:01 +00:00
[
2016-01-25 16:31:45 +00:00
'Content-type' => 'text/plain' ,
2017-10-28 18:16:43 +00:00
'Content-Disposition' => 'attachment; filename="' . $this -> title . '.txt"' ,
2016-01-25 16:31:45 +00:00
'Content-Transfer-Encoding' => 'UTF-8' ,
2016-04-12 09:36:01 +00:00
]
2016-02-05 12:35:59 +00:00
);
2016-01-25 16:31:45 +00:00
}
2015-10-18 13:49:00 +00:00
/**
* Return a Serializer object for producing processes that need it ( JSON & XML ) .
*
2016-03-28 12:18:50 +00:00
* @ param string $format
*
2017-07-29 20:51:50 +00:00
* @ return string
2015-10-18 13:49:00 +00:00
*/
2015-10-18 13:59:15 +00:00
private function prepareSerializingContent ( $format )
2015-10-18 13:49:00 +00:00
{
2015-10-19 19:17:30 +00:00
$serializer = SerializerBuilder :: create () -> build ();
2015-10-18 13:49:00 +00:00
2015-10-30 19:57:10 +00:00
return $serializer -> serialize (
$this -> entries ,
$format ,
2016-04-12 09:36:01 +00:00
SerializationContext :: create () -> setGroups ([ 'entries_for_user' ])
2015-10-30 19:57:10 +00:00
);
2015-10-18 13:49:00 +00:00
}
2015-10-16 08:51:53 +00:00
/**
* Return a kind of footer / information for the epub .
*
* @ param string $type Generator of the export , can be : tdpdf , PHPePub , PHPMobi
*
* @ return string
*/
private function getExportInformation ( $type )
{
2017-10-08 06:55:30 +00:00
$info = $this -> translator -> trans ( 'export.footer_template' , [
'%method%' => $type ,
]);
2015-10-16 08:51:53 +00:00
if ( 'tcpdf' === $type ) {
2017-10-28 18:16:43 +00:00
return str_replace ( '%IMAGE%' , '<img src="' . $this -> logoPath . '" />' , $info );
2015-10-15 18:06:59 +00:00
}
2015-10-16 08:51:53 +00:00
return str_replace ( '%IMAGE%' , '' , $info );
2015-10-15 18:06:59 +00:00
}
}