Add: basic scraper that gets all urls from a wikipedia article

2024-07-12 21:48:47 +02:00 · 2024-07-12 21:48:47 +02:00 · 1f63795e49
commit 1f63795e49
parent 30a1f8052c
1 changed files with 20 additions and 0 deletions
--- a/web-scraper/basic-scraper.go
+++ b/web-scraper/basic-scraper.go
@ -0,0 +1,20 @@
 package main
 import (
    "fmt"
    "github.com/gocolly/colly"
 )
 func main() {
    c := colly.NewCollector(
        colly.AllowedDomains("en.wikipedia.org"),
    )
    // Find and print all links
    c.OnHTML(".mw-parser-output", func(e *colly.HTMLElement) {
        links := e.ChildAttrs("a", "href")
        fmt.Println(links)
    })
    c.Visit("https://en.wikipedia.org/wiki/Web_scraping")
 }