Google Answers Logo
View Question
 
Q: Two VBScript Functions Needed ( Answered 5 out of 5 stars,   0 Comments )
Question  
Subject: Two VBScript Functions Needed
Category: Computers > Programming
Asked by: rehabguy-ga
List Price: $5.00
Posted: 21 Oct 2003 16:55 PDT
Expires: 20 Nov 2003 15:55 PST
Question ID: 268404
Using a raw HTML string from an HTTP call:

Function 1: Find the first 10 links in the raw HTML and extract them
in a fully qualified http format:

Ex: <a href="folder/index.htm">Link</a>
Becomes: "http://www.domain.com/folder/index.htm"

Function 2: Remove all of the HTML tags, style sheets, script tags,
etc, from the HTML, leaving only the raw text.

Let me know if you need clarification!
Thanks
Answer  
Subject: Re: Two VBScript Functions Needed
Answered By: dewolfe001-ga on 04 Nov 2003 16:47 PST
Rated:5 out of 5 stars
 
This should work. It requires the ASPhttp component and line 97 needs
to be changed to whatever URL you are pointing at.

<%

Function stripHTML(strHTML)
	'Strips the HTML tags from strHTML

	Dim objRegExp, strOutput
	Set objRegExp = New Regexp

	objRegExp.IgnoreCase = True
	objRegExp.Global = True
	objRegExp.Pattern = "<(.|\n)+?>"

	'Replace all HTML tag matches with the empty string
	strOutput = objRegExp.Replace(strHTML, "")

	'Replace all < and > with &lt; and &gt;
	strOutput = Replace(strOutput, "<", "&lt;")
	strOutput = Replace(strOutput, ">", "&gt;")

	stripHTML = strOutput    'Return the value of strOutput

	Set objRegExp = Nothing
End Function

Function stripCSSTags(strCSS)
	x = 1
	SOn = 1
	strOutput = ""
	Do While x < Len(strCSS) 
		If Mid(strCSS,x,6) = "<style" Then
			SOn = 0
			x = x + 1
		End If		
		If SOn = 1 Then
			strOutput = strOutput & Mid(strCSS,x,1)
		Else
			' Response.Write(Mid(strCSS,x,6) & Chr(13))
		End If
		If Mid(strCSS,x,8) = "</style>" Then
			SOn = 1
			x = x + 1
		End If
		x = x + 1
	Loop
	stripCSSTags = strOutput
End Function

Function stripScriptTags(strScript)
	x = 1
	SOn = 1
	strOutput = ""
	Do While x < Len(strScript) 
		If Mid(strScript,x,7) = "<script" Then
			SOn = 0
			x = x + 1
		End If		
		If SOn = 1 Then
			strOutput = strOutput & Mid(strScript,x,1)
		Else
			' Response.Write(Mid(strScript,x,6) & Chr(13))
		End If
		If Mid(strScript,x,9) = "</script>" Then
			SOn = 1
			x = x + 1
		End If
		x = x + 1
	Loop
	stripScriptTags = strOutput
End Function

Function stripOutLink(linos)
	If IsNull(linos) Or Len(linos) < 3 Then 
			stripOutLink = FALSE		
	Else
		Dim funcObjRegExp
		Set funcObjRegExp = New Regexp
		funcObjRegExp.IgnoreCase = True
		funcObjRegExp.Global = True
		funcObjRegExp.Pattern = "<a href\="

		aref = "<a href="""

		If IsEmpty(linos) OR IsNull(linos) Then
			stripOutLink = FALSE
		End If
		If (funcObjRegExp.Test(linos)) Then
			lpos = InStr(LCase(linos),aref)
			rpos = InStr(LCase(linos),""">")
			stripOutLink = Mid(linos,lpos+9,(rpos-(lpos+9)))
		Else
			stripOutLink = FALSE
		End If
	End If
End Function

Set HttpObj = Server.CreateObject("AspHTTP.Conn")
HTTPObj.Url = "http://mike.dewolfe.bc.ca/index.asp"

myURLfile = HTTPObj.GetURL
For x = 0 to 12 
	myURLfile = Replace(myURLfile,Chr(x),"")
Next

Dim URLines
Dim TenLinks(10)
Ten = 0
Lines = 0


If InStr(myURLfile,Chr(13)) > 1 Then
	URLines = split(myURLfile,Chr(13))
Else
	URLines = myURLfile
End If

Do While Ten < 10 And UBound(URLines) > Lines
	If stripOutLink(URLines(Lines)) <> FALSE Then
		If Left(stripOutLink(URLines(Lines)),4) = "http" Then
			TenLinks(Ten) = stripOutLink(URLines(Lines))			
		Else
			TenLinks(Ten) = "http://www.domain.com/" & stripOutLink(URLines(Lines))
		End If
		Ten = Ten + 1
	End If
	Lines = Lines + 1
Loop

OnlyText = stripCSSTags(myURLfile)
OnlyText = stripScriptTags(OnlyText)
OnlyText = stripHTML(OnlyText)


OnlyText = Replace(OnlyText,"&nbsp;","")
OnlyText = Replace(OnlyText,"&nbsp","")
OnlyText = Replace(OnlyText," " & Chr(13),Chr(13))
OnlyText = Replace(OnlyText,Chr(13) & Chr(13),"")
%>
<% For x = 0 to 9 %>
	<% =TenLinks(x) %><br>
<% Next %>
<br>
<% =OnlyText %>
rehabguy-ga rated this answer:5 out of 5 stars and gave an additional tip of: $5.00
Perfect! Nice work on this.

Comments  
There are no comments at this time.

Important Disclaimer: Answers and comments provided on Google Answers are general information, and are not intended to substitute for informed professional medical, psychiatric, psychological, tax, legal, investment, accounting, or other professional advice. Google does not endorse, and expressly disclaims liability for any product, manufacturer, distributor, service or service provider mentioned or any opinion expressed in answers or comments. Please read carefully the Google Answers Terms of Service.

If you feel that you have found inappropriate content, please let us know by emailing us at answers-support@google.com with the question ID listed above. Thank you.
Search Google Answers for
Google Answers  


Google Home - Answers FAQ - Terms of Service - Privacy Policy