ArachNode
+ New Spider
Save
Run
import re import scrapy def clean_text(s: str) -> str: return re.sub(r"\s+", " ", (s or "").replace("\xa0", " ")).strip() def clean_number(s: str) -> str: # keeps digits, dot, comma, minus; then remove thousands separators s = clean_text(s) s = re.sub(r"[^0-9,\.\-]", "", s) return s.replace(",", "") class MySpider(scrapy.Spider): name = "rbz_rates" allowed_domains = ["rbz.co.zw"] # Use whichever page actually contains that "baTab0" HTML. # If you know the exact page, replace this with it. start_urls = ["https://www.rbz.co.zw/"] custom_settings = { "ROBOTSTXT_OBEY": True, "USER_AGENT": "Mozilla/5.0 (compatible; rbz-rates-scraper/1.0)", } def parse(self, response): # The "Exchange Rates" table is inside <div id="baTab0"> ... <table> ... </table> tab = response.css("#baTab0") if not tab: self.logger.warning("Could not find #baTab0 on %s", response.url) return table = tab.css("table").get() rows = tab.css("table tbody tr") # Attempt to grab the date from the header line like: # "EXCHANGE RATES 27-02-2026" header_text = clean_text(" ".join(tab.css("strong *::text, strong::text").getall())) m = re.search(r"EXCHANGE\s+RATES\s+(\d{2}-\d{2}-\d{4})", header_text, flags=re.I) rates_date = m.group(1) if m else None # Data rows look like: CURRENCY | BID | ASK | AVG # In your snippet, those start after the header rows, so we filter by # "has at least 4 cells with text". for tr in rows: tds = tr.css("td") if len(tds) < 4: continue currency = clean_text(" ".join(tds[0].css("::text, *::text").getall())) bid = clean_number(" ".join(tds[1].css("::text, *::text").getall())) ask = clean_number(" ".join(tds[2].css("::text, *::text").getall())) avg = clean_number(" ".join(tds[3].css("::text, *::text").getall())) # Skip header-ish rows if not currency or currency.upper() in {"CURRENCY", "INTERBANK RATES"}: continue # Skip blank numeric rows if not any([bid, ask, avg]): continue yield { "source_url": response.url, "date": rates_date, "currency_pair": currency, "bid": float(bid) if bid else None, "ask": float(ask) if ask else None, "avg": float(avg) if avg else None, }
Schedule
Runs