Add: basic scraper that gets all urls from a wikipedia article
This commit is contained in:
parent
30a1f8052c
commit
1f63795e49
1 changed files with 20 additions and 0 deletions
20
web-scraper/basic-scraper.go
Normal file
20
web-scraper/basic-scraper.go
Normal file
|
@ -0,0 +1,20 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/gocolly/colly"
|
||||
)
|
||||
|
||||
func main() {
|
||||
c := colly.NewCollector(
|
||||
colly.AllowedDomains("en.wikipedia.org"),
|
||||
)
|
||||
|
||||
// Find and print all links
|
||||
c.OnHTML(".mw-parser-output", func(e *colly.HTMLElement) {
|
||||
links := e.ChildAttrs("a", "href")
|
||||
fmt.Println(links)
|
||||
})
|
||||
c.Visit("https://en.wikipedia.org/wiki/Web_scraping")
|
||||
}
|
Loading…
Reference in a new issue