KDD 2009 Annotated Abstracts Notes
Jump to navigation
Jump to search
See: KDD-2009 Annotated Abstracts.
extractCvineInfo.sh
#!/bin/bash
# ./extractCvineInfo.sh >& cvine.tab
for file in `(cd ..; /bin/ls 4??? 5???)`
#for file in `(cd ..; /bin/ls 4900)`
do
#echo $file
fileLoc="../$file"
lines=`wc -l $fileLoc | awk '{print $1}'`
# GET THE TALK TYPE {S, P, W, D, T}
lineTrack=`grep -n ">Track:<" $fileLoc | sed "s/:/ /" | awk '{print $1}'`
Track=`grep ">Track:<" $fileLoc | awk '{print $2}' | sed "s/://"`
trackG=`echo $Track | perl -ne 'chomp; s/([SPWDT]).*/$1/g; print $_'`
# GET THE TITLE
lineTitle=`grep -n “<h2>” $fileLoc | grep -v "Comment" | head -1 | sed "s/:/ /" | awk '{print $1}' `
title=`grep “<h2>” $fileLoc | grep -v "Comment" | head -1 | sed "s/<h2>''"| sed "s/<\/h2>''"`
# GET THE AUTHORS
lineAuthors=`grep -n "Speaker" $fileLoc | grep -v "Comment" | head -1 | sed "s/:/ /" | awk '{print $1}'`
authLines=`echo $lineTrack - $lineAuthors | bc`
authors=`head -$lineTrack $fileLoc | tail -$authLines | grep href | perl -ne "chomp; s/<.*?>'' ;s/<.*/,/; print" | perl -ne "s/\s+/ /g; s/,[ ]*?$'' ; print"`
# GET THE ABSTRACT
# get the abstract's start line (add 1)
# lineAbstract1=`grep -n class=\"body $fileLoc | sed "s/:/ /" | awk '{print $1}' | perl -ne 'chomp; print 1+$_'`
lineAbstract1=`grep -n class=\"body $fileLoc | sed "s/:/ /" | awk '{print $1}'`
lineAbstract1r=`echo $lines - $lineAbstract1 | bc`
lineAbstract2_=`tail -$lineAbstract1r $fileLoc | grep -n "clear" | sed "s/:/ /" | awk '{print $1}' | head -1`
abstract=`tail -$lineAbstract1r $fileLoc | head -$lineAbstract2_ | perl -ne 's/^\s+'' ; s/\n+/<BR>/g; s/<p>|<\/p>/ /g; print' | perl -ne 's/<\/div.*''g; s/\s*<BR>\s*$'' ; s/\s+/ /g; print' `
# abstract=`head -$lineAbstract $fileLoc | tail -1 | sed "s/<p>''" | sed "s/<\/p>''" | sed "s/\s+/ /g"`
#debug
#echo "1[$fileLoc|$lineTrack|$trackG]"
#echo "2[$lineTitle|$title]"
#echo "3[$lineTrack|$lineAuthors|$authLines]"
#head -$lineTrack $fileLoc | tail -$authLines | grep href
#echo "5[$lineAuthors|$authLines|$authors]"
# STORE
tab=" "
echo "$file$tab$Track$tab$trackG$tab$authors$tab$title$tab$abstract"
done
createWikiFiles.pl
#!/usr/bin/perl -w
# createWikiFiles.pl
# cat Combined4a.tab | ./createWikiFiles.pl > summaryPage
use strict ;
sub trim($);
while (<>) {
#print "----\n$_";
my @w = split(/\t/, $_) ;
my $title=trim($w[0]);
my $doi=trim($w[2]) ;
my $authors=trim($w[5]) ;
my $abstract=trim($w[6]) ;
my $comprTitl = $title ;
$comprTitl =~ s/[\ \.\:\;\-\"\']/ /g ;
$comprTitl =~ s/(\w+)/\u$1/g;
$comprTitl =~ s/\s+''g;
$comprTitl = substr($comprTitl, 0, 26);
my @authors=split(',',$authors) ;
my $authorsItem ;
my $i ;
my @authorLnames ;
for ($i=0; $i<=$#authors; $i++) {
my $author=$authors [$i] ;
$author=~s/^ +'' ;
$authorsItem .= " " if $i>0;
$authorsItem .= $author ;
$authorsItem .= "," if $i<$#authors ;
$authorsItem .= " and" if ($i>=0 and $i+1==$#authors) ;
# decompose the name
my @authorName=split('\s', $author) ;
#print "authorName $#authorName @authorName\n";
my $authorLname ;
if ($#authorName==1) {
$authorLname=$authorName[1] ;
#print "2 $authorLname\n" ;
} elsif ($author =~ /\./) {
$authorLname = $author ;
$authorLname =~ s/.*\. *'' ;
#print ">2\n" ;
} else {
$authorLname = $authorName[$#authorName] ;
}
push (@authorLnames, $authorLname) ;
#print "i=$i, [$author] [$authorLname]\n";
}
#print "$#authorLnames : @authorLnames\n" ;
# Create the author abbreviation
my $abbrAuthors ;
if ($#authorLnames == 0) {
$abbrAuthors="$authorLnames [0]" ;
#print "one - $authorLnames [0]\n"
} elsif ($#authorLnames == 1) {
$abbrAuthors="$authorLnames [0] & $authorLnames [1]" ;
#print "two - $authorLnames [0] & $authorLnames [1]\n"
} else {
$abbrAuthors="$authorLnames [0] & al" ;
#print ">two - $authorLnames [0] & al\n";
}
my $authorsLname=$authors ;
print "* ([[2009_$comprTitl|$abbrAuthors, 2009]]) ⇒ $authorsItem\. ([[2009]]). \"''$title\.''\" In: [[Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining]] ([[KDD-2009]]). [http://dx.doi.org/$doi doi:$doi]\n" ;
#print “<B> $abstract\n";
print "----\n\n" ;
my $outfile = "2009_$comprTitl" ; # print "$outfile\n";
open OUT, "> $outfile" or die "Can't open $outfile : $!" ;
print OUT "* ([[2009_$comprTitl|$abbrAuthors, 2009]]) ⇒ $authorsItem\. ([[2009]]). \"''$title\.''\" In: [[Proceedings of the 15th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining]] ([[KDD-2009]]). [http://dx.doi.org/$doi doi:$doi]\n" ;
print OUT "----\n<B>Subject Headings:</B>\n----\n==Notes\n\n\n----\n==Quotes\n\n===Abstract\n" ;
print OUT "* $abstract\n";
print OUT "\n\n----\n" ;
close OUT ;
}
# Perl trim function to remove whitespace from the start and end of the string
sub trim($)
{
my $string = shift;
$string =~ s/^\s+|\s+$''g ;
return $string;
}
putUpOnWiki.sh
#!/bin/bash # ./putUpOnWiki.sh for file in `/bin/ls 2009_*` do echo $file dest='http://www.gabormelli.com/rkb_080709.cgi?'$file cat $file | ./wikiput.pl -s "putUpOnWiki 090724" "$dest" & sleep 5 & done