Add: basic scraper that gets all urls from a wikipedia article

2024-07-12 21:48:47 +02:00 · 2024-07-12 21:48:47 +02:00 · 1f63795e49
commit 1f63795e49
parent 30a1f8052c
1 changed files with 20 additions and 0 deletions
--- a/web-scraper/basic-scraper.go
+++ b/web-scraper/basic-scraper.go
@ -0,0 +1,20 @@
+package main
+
+import (
+    "fmt"
+
+    "github.com/gocolly/colly"
+)
+
+func main() {
+    c := colly.NewCollector(
+        colly.AllowedDomains("en.wikipedia.org"),
+    )
+
+    // Find and print all links
+    c.OnHTML(".mw-parser-output", func(e *colly.HTMLElement) {
+        links := e.ChildAttrs("a", "href")
+        fmt.Println(links)
+    })
+    c.Visit("https://en.wikipedia.org/wiki/Web_scraping")
+}