Skip to content

Commit 9eeefda

Browse files
committed
php-cs-fix
1 parent 2ba6c47 commit 9eeefda

9 files changed

+486
-369
lines changed

Diff for: config.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
"DB_USER": "root",
55

6-
"DB_PASSWORD": "",
6+
"DB_PASSWORD": "7110$$$",
77

88
"DB_NAME": "localgoogoo"
99
}

Diff for: php/crawl.php

-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
if (isInvalid($name, $url)) {
3636
exit("\n$usageText".PHP_EOL." Invalid URL or Website name".PHP_EOL);
3737
}
38-
3938
} else {
4039
exit("This script is meant to be run from the command line");
4140
}

Diff for: php/crawler/crawler.class.php

+28-25
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class LGCrawler
3333

3434
/**
3535
* Crawler constructor
36-
*
36+
*
3737
* @param [string] $sitename website name
3838
* @param [string] $siteurl website url
3939
* @param [resource] $SQLConn mysql connection
@@ -52,7 +52,7 @@ public function __construct($sitename, $siteurl, $SQLConn)
5252

5353
/**
5454
* method to log messages
55-
*
55+
*
5656
* @param string $file the log file name
5757
* @param string $text the log message
5858
*/
@@ -126,7 +126,8 @@ private function runCrawler($url = null)
126126
// store all links in `$url`s content
127127
// for later crawling
128128
$this->getLinks(
129-
$url, function ($pageURL) use ($fileContent, &$links, &$crawledPages, $url) {
129+
$url,
130+
function ($pageURL) use ($fileContent, &$links, &$crawledPages, $url) {
130131

131132
// callback
132133
if (isset($this->onCrawlCallback[0])) {
@@ -141,8 +142,8 @@ private function runCrawler($url = null)
141142
if ($this->isAlike($this->siteurl, $pageURL) && !in_array($pageURL, $crawledPages)) {
142143
array_push($links, $pageURL);
143144
}
144-
145-
}, $fileContent /* page content provided so this method wont need to fetch the content again */
145+
},
146+
$fileContent /* page content provided so this method wont need to fetch the content again */
146147
);
147148

148149
// crawl all links in the `$links` array
@@ -155,10 +156,10 @@ private function runCrawler($url = null)
155156

156157
/**
157158
* convert relative url to absolute url
158-
*
159+
*
159160
* @param [string] $rel relative url
160161
* @param [string] $base base url
161-
*
162+
*
162163
* @return [string] absolute url
163164
*/
164165
private function rel2abs($rel, $base)
@@ -258,10 +259,10 @@ private function getLinks($u, $callback, $content = '')
258259

259260
/**
260261
* method to add pages to the db as we crawl
261-
*
262+
*
262263
* @param [string] $link page url
263264
* @param [string] $content page content
264-
*
265+
*
265266
* @return [boolean] page added or not
266267
*/
267268
private function addPageToDatabase($link, $content)
@@ -296,10 +297,10 @@ private function addPageToDatabase($link, $content)
296297
$dom = str_get_html($content);
297298

298299
// get <strong>, <b>, <em> tags from page
299-
$pageEmphasis = $this->getPageElems($dom, $content,"strong,em,b");
300+
$pageEmphasis = $this->getPageElems($dom, $content, "strong,em,b");
300301

301302
// get headers <h1>-<h6>
302-
$pageHeaders = $this->getPageElems($dom, $content,"h1,h2,h3,h4,h5,h6");
303+
$pageHeaders = $this->getPageElems($dom, $content, "h1,h2,h3,h4,h5,h6");
303304

304305
// strip out tags and remove useless html elements
305306
$content = $this->stripTags($content);
@@ -375,12 +376,13 @@ private function insertSiteInToDB()
375376
* @param [DOMObject] $dom html node object from 'simple_html_dom' lib
376377
* @param [Array[string]] $selectors selectors to be removed from dom
377378
*/
378-
private function removeElem($dom, $selectors) {
379+
private function removeElem($dom, $selectors)
380+
{
379381
foreach ($selectors as $selector) {
380-
$elems = $dom->find($selector);
381-
foreach ($elems as $E) {
382-
$E->innertext = "";
383-
}
382+
$elems = $dom->find($selector);
383+
foreach ($elems as $E) {
384+
$E->innertext = "";
385+
}
384386
}
385387
}
386388

@@ -392,20 +394,21 @@ private function removeElem($dom, $selectors) {
392394
*
393395
* @return [string] string containing tag content
394396
*/
395-
private function getPageElems($dom, $content, $tags) {
397+
private function getPageElems($dom, $content, $tags)
398+
{
396399
$headers = $dom->find($tags);
397400
$str = "";
398401
foreach ($headers as $h) {
399-
$str .= preg_replace("#&[a-z0-9]+;#i", "", $h->plaintext) . " ";
402+
$str .= preg_replace("#&[a-z0-9]+;#i", "", $h->plaintext) . " ";
400403
}
401404
return strip_tags($str);
402405
}
403406

404407
/**
405408
* strip out tags from html document
406-
*
409+
*
407410
* @param [string] $string HTML string
408-
*
411+
*
409412
* @return [string] HTML with tags stripped
410413
*/
411414
private function stripTags($string)
@@ -447,7 +450,7 @@ private function stripTags($string)
447450
* check if string is html
448451
*
449452
* @param [string] $string string to check
450-
*
453+
*
451454
* @return [boolean] html or not
452455
*/
453456
private function isHTML($string)
@@ -458,10 +461,10 @@ private function isHTML($string)
458461
/**
459462
* check if urls are alike
460463
* so as to prevent the crawler from exceeding its boundaries
461-
*
464+
*
462465
* @param [string] $url1 original url
463466
* @param [string] $url2 test url
464-
*
467+
*
465468
* @return [boolean] alike or not
466469
*/
467470
private function isAlike($url, $testUrl)
@@ -492,7 +495,7 @@ private function isAlike($url, $testUrl)
492495

493496
/**
494497
* Takes a url and returns false (if its inaccessible) else it contents
495-
*
498+
*
496499
* @param [string] $url url to fetch
497500
*/
498501
private function getPageContent($url)
@@ -504,7 +507,7 @@ private function getPageContent($url)
504507
* delete multiple whitespaces
505508
*
506509
* @param [string] $value string to trim
507-
*
510+
*
508511
* @return [string] trimmed string
509512
*/
510513
private function _trim($str)

Diff for: php/inc/helpers.inc.php

-1
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,3 @@ function prepareConfigFile($config_file)
146146
file_put_contents($config_file, json_encode($data, JSON_PRETTY_PRINT));
147147
}
148148
}
149-

0 commit comments

Comments
 (0)