Add: basic scraper that gets all urls from a wikipedia article
This commit is contained in:
parent
30a1f8052c
commit
1f63795e49
1 changed files with 20 additions and 0 deletions
20
web-scraper/basic-scraper.go
Normal file
20
web-scraper/basic-scraper.go
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/gocolly/colly"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
c := colly.NewCollector(
|
||||||
|
colly.AllowedDomains("en.wikipedia.org"),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Find and print all links
|
||||||
|
c.OnHTML(".mw-parser-output", func(e *colly.HTMLElement) {
|
||||||
|
links := e.ChildAttrs("a", "href")
|
||||||
|
fmt.Println(links)
|
||||||
|
})
|
||||||
|
c.Visit("https://en.wikipedia.org/wiki/Web_scraping")
|
||||||
|
}
|
Loading…
Reference in a new issue