From 1f63795e49ab8a9a97189ead44df5d4e3520e09a Mon Sep 17 00:00:00 2001 From: Aroy-Art Date: Fri, 12 Jul 2024 21:48:47 +0200 Subject: [PATCH] Add: basic scraper that gets all urls from a wikipedia article --- web-scraper/basic-scraper.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 web-scraper/basic-scraper.go diff --git a/web-scraper/basic-scraper.go b/web-scraper/basic-scraper.go new file mode 100644 index 0000000..7e7ef4c --- /dev/null +++ b/web-scraper/basic-scraper.go @@ -0,0 +1,20 @@ +package main + +import ( + "fmt" + + "github.com/gocolly/colly" +) + +func main() { + c := colly.NewCollector( + colly.AllowedDomains("en.wikipedia.org"), + ) + + // Find and print all links + c.OnHTML(".mw-parser-output", func(e *colly.HTMLElement) { + links := e.ChildAttrs("a", "href") + fmt.Println(links) + }) + c.Visit("https://en.wikipedia.org/wiki/Web_scraping") +}