Siema, napisalem sobie webcrawlera, ktory po podaniu adresu strony wypisuje do pliku wszystkie linki znajdujace sie na stronie, a nastepnie wszystkie linki, z tych linkow, ktore sa w pliku. Mam nadzieje, ze to jasne. Problem polega na tym, ze ze strony glownej linki wypisuje jakies 3 sekundy(jest ich okolo 300), natomiast z tych 300 linkow przy ostatniej probie wypisywal 12 minut (bylo ich 10 000). Da sie to jakos szybciej zrobic? Jak zaczne wchodzic glebiej, to zycia mi braknie.
public class LinkExtract {
public LinkExtract() throws Exception {
System.out.print("Podaj adres strony: ");
buff = new BufferedReader(new InputStreamReader(System.in));
urlString = buff.readLine();
readLinksFromPage();
readLinksFromFile("links.xml");
writePageToFile();
countWords();
}
public static void main(String[] args) throws Exception {
LinkExtract linkExtractor = new LinkExtract();
}
private void readLinksFromPage() throws Exception {
out = new BufferedWriter(new FileWriter("links.xml"));
linkBean = new LinkBean();
linkBean.setURL(urlString);
links = linkBean.getLinks();
for(URL link : links) {
if(!result.contains(link.toString())){
result.add(link.toString());
out.write(link.toString() + '\n');
countLinks++;
}
}
out.close();
System.out.println(countLinks);
}
private void readLinksFromFile(String filename) throws Exception {
BufferedWriter out1 = new BufferedWriter(new FileWriter("links1.xml"));
fstream = new FileInputStream(filename);
in = new DataInputStream(fstream);
br = new BufferedReader(new InputStreamReader(in));
linkBean = new LinkBean();
while ((strLine = br.readLine()) != null) {
linkBean.setURL(strLine);
links = linkBean.getLinks();
for(URL link : links) {
if(!result.contains(link.toString())){
result.add(link.toString());
out1.write(link.toString() + '\n');
countLinks++;
}
}
}
out1.close();
in.close();
System.out.println(countLinks);
}
private void writePageToFile() throws Exception {
url = new URL(urlString);
out = new BufferedWriter(new FileWriter("content.xml"));
buffReader = new BufferedReader(new InputStreamReader(url.openStream()));
while ((inputLine = buffReader.readLine()) != null) {
out.write(inputLine + '\n');
}
out.close();
}
private void countWords() throws Exception {
System.out.print("Podaj szukany tekst: ");
klaw = new BufferedReader(new InputStreamReader(System.in));
searchString = klaw.readLine();
fstream = new FileInputStream("content.xml");
in = new DataInputStream(fstream);
charArray = searchString.toCharArray();
while ((n = in.read()) != -1) {
c = (char)n;
if (c == charArray[place]) {
place++;
if (place == charArray.length) {
place = 0;
countWords++;
}
}
else place = 0;
}
System.out.println(countWords);
in.close();
file = new File("content.xml");
file.delete();
}
//parsing html file
Collection< String> result = new ArrayList< String>();
BufferedReader buff;
BufferedWriter out;
URL[] links;
LinkBean linkBean;
String urlString;
int countLinks = 0;
//read file
FileInputStream fstream;
DataInputStream in;
BufferedReader br;
String strLine;
//write page to file
URL url;
BufferedReader buffReader;
String inputLine;
//count words
BufferedReader klaw;
char[] charArray;
int place = 0;
int n = 0;
char c;
int countWords = 0;
String searchString;
File file;
}