Information Technology - Computer Programming - Source Code - Homebrew - Open Source - Software - Hardware - 8 bit - 16 bit - 32 bit - 64 bit - x86 - x64 - DOS - Windows - Linux - Arduino - Embedded - Development - Retro - Vintage - Math - Science - History - Hobby - Beginners - Professionals - Experiment - Research - Study - Fun - Games

ATTENTION NEW USERS: Due to bot traffic, we are forced to manually approve registrations. We get thousands of bots trying to register, causing us to delete registrations in bulk with little ability to filter what is real or not. If you're having trouble getting approved, then send an email to ptrworkmails@gmail.com explaining that you are a real user. Use the same email you're trying to register with. Thank you.

HTM Parser

Share your Express Basic creations here.
Post Reply
admin
Site Admin
Posts: 142
Joined: Wed Feb 22, 2023 6:51 am

HTM Parser

Post by admin »

HTM Parser

Converts HTM/HTML files to plain text.

Code: Select all

1 REM HTM Parser by Gemino Smothers
2 REM Written in Express BASIC
10 lf$ = CHR$(10): INPUT "*.htm: ", htmfile$: GOSUB 1000: END
1000 PRINT "Parsing: "; htmfile$
1010 OPEN "I", #1, htmfile$
1020 OPEN "O", #2, "parsed.txt"
1030 IF EOF(1) THEN 1080
1040 returnbuffer$ = "": INPUT #1, rawhtm$
1050 FOR chars = 1 TO LEN(rawhtm$): GOSUB 2000: NEXT chars
1060 PRINT returnbuffer$: PRINT #2, returnbuffer$
1070 IF EOF(1) = 0 THEN 1030
1080 CLOSE #1: CLOSE #2
1090 PRINT "Done.": RETURN
2000 tagchars$ = UCASE$(MID$(rawhtm$, chars, 3)): PRINT tagchars$
2010 IF tagchars$ = "<BO" THEN GOSUB 3000: GOTO 2100
2020 IF tagchars$ = "<BR" OR tagchars$ = "<HR" OR tagchars$ = "<P>" OR tagchars$ = "</P" THEN GOSUB 4000: GOTO 2100
2030 IF tagchars$ = "<DI" OR tagchars$ = "</D" OR tagchars$ = "<TA" OR tagchars$ = "</T" THEN GOSUB 4000: GOTO 2100
2040 IF tagchars$ = "<TR" OR tagchars$ = "<TD" OR tagchars$ = "<TH" THEN GOSUB 4000: GOTO 2100
2050 IF tagchars$ = "<SC" OR tagchars$ = "<ST" THEN GOSUB 5000: GOTO 2100
2060 IF tagchars$ = "</S" THEN GOSUB 6000: GOTO 2100
2070 IF tagchars$ = "<A " THEN GOSUB 7000: GOTO 2100
2080 IF tagchars$ = "&NB" THEN GOSUB 8000: GOTO 2100
2090 GOSUB 9000
2100 RETURN
3000 body = 1: GOSUB 10000: RETURN
4000 IF body THEN returnbuffer$ = returnbuffer$ + lf$: GOSUB 10000
4010 RETURN
5000 body = 0: RETURN
6000 body = 1: GOSUB 10000: RETURN
7000 returnbuffer$ = returnbuffer$ + lf$ + "LINK: (": quote = 0
7010 IF MID$(rawhtm$, chars, 1) <> CHR$(34) THEN 7040
7020 IF quote = 0 THEN quote = 1: chars = chars + 1: GOTO 7040
7030 quote = 0
7040 IF quote THEN returnbuffer$ = returnbuffer$ + MID$(rawhtm$, chars, 1)
7050 chars = chars + 1
7060 IF MID$(rawhtm$, chars, 1) <> ">" AND chars < LEN(rawhtm$) THEN 7010
7070 returnbuffer$ = returnbuffer$ + ") ": RETURN
8000 returnbuffer$ = returnbuffer$ + " ": targetchar$ = ";": GOSUB 11000: RETURN
9000 IF body = 0 THEN 9040
9010 targetchar$ = MID$(rawhtm$, chars, 1)
9020 IF MID$(rawhtm$, chars, 1) = "<" THEN targetchar$ = MID$(rawhtm$, chars + 1, 1): GOSUB 12000: GOTO 9040
9030 targetchar$ = MID$(rawhtm$, chars, 1): GOSUB 13000
9040 RETURN
10000 targetchar$ = ">": GOSUB 11000: RETURN
11000 checkchars = chars
11010 checkchars = checkchars + 1
11020 IF MID$(rawhtm$, checkchars, 1) <> targetchar$ AND checkchars < LEN(rawhtm$) THEN 11010
11030 IF MID$(rawhtm$, checkchars, 1) = targetchar$ THEN chars = checkchars
11040 RETURN
12000 IF LEN(targetchar$) THEN achar = ASC(targetchar$)
12010 IF (achar < 48 OR achar > 57) AND achar <> 32 AND achar <> 44 AND achar <> 60 THEN GOSUB 10000: RETURN
12020 returnbuffer$ = returnbuffer$ + MID$(rawhtm$, chars, 1): RETURN
13000 fachar = ASC(targetchar$)
13010 IF (fachar < 9 OR fachar > 11) AND fachar <> 13 THEN returnbuffer$ = returnbuffer$ + targetchar$
13020 RETURN
Post Reply