###############################################################################
# Local Security Check Automation Framework
#
# Authors:
# Veerendra GG <veerendragg@secpod.com>
#
# Revision 1.0
# Date: 2009/02/13
#
# Copyright:
# Copyright (c) 2009 SecPod , http://www.secpod.org
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
# (or any later version), as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
###############################################################################

import re
import os
import sys
import string

from common import utils


## Supported CentOS OSes for parsing. The value is as used in
## gather-package-list.nasl to set "ssh/login/release"
os_map = {

#    'CentOS 1' : 'CentOS1', not present in gatherpkglist and also very old

    'CentOS 2' : 'CentOS2',
    'CentOS 3' : 'CentOS3',
    'CentOS 4' : 'CentOS4',
    'CentOS 5' : 'CentOS5',
}

## Strips these from strip list
strip_val = ['.i586.rpm', '.x86_64.rpm', '.noarch.rpm', '.i386.rpm',
             '.i686.rpm', '.src.rpm', '.i386']

append_url = 'http://lists.centos.org/pipermail/centos-announce/'

## These are not advisories
skip_list = ['']


class Parser:
    """
    CentOS security advisory parser, parse and populate the global variables
    """

    ## Global parse structure, initializing
    AdvID = ''
    Description = ''
    Packages = {}
    CVEs = ''
    Name = ''
    Summary = ''
    Platforms = ''
    Product = []
    Html_content = ''
    XREF = []
    FileName = ''
    adv_id_link_map = {}


    def _mapAdvIdLink(self, year, debug=0):
        """
        Returns dict containing Red Hat Advisory ID and Link
        """
        append_url = 'https://www.redhat.com/archives/rhsa-announce/'
        link = append_url
        dict_map = {}
        advid_link = []
        all_links = []

        links = self._getYearLinks(link, year, append_url)
        if links:
            all_links.extend(links)

        year = str(int(year) - 1)
        links = self._getYearLinks(link, year, append_url)
        if links:
            all_links.extend(links)

        if not all_links:
            print "No links found"
            return ''

        for link in all_links:
            data = utils.getHTMLCon(link)
            advid_link = re.findall('href="(msg.*.html).*\[(RHSA.*)\]', data)
            year_month = os.path.basename(os.path.split(link)[0])

            if advid_link:
                for j in advid_link:
                    url = append_url + year_month + '/' + j[0]
                    dict_map[j[1]] = url

        return dict_map


    def _getYearLinks(self, link, year, append_url, debug=0):
        """
        Gets the advisory links for the given year
        """
        year_links = []
        data = utils.getHTMLCon(link)
        links = re.findall('href="(' + str(year) + '.*thread.html)', data)
        if links:
            for i in links:
                year_links.append(append_url + i)

            if debug:
                if year_links:
                    print "\nCentOS Advisory Links for (%s) year" %(year)
                    for i in year_links:
                        print i

            return year_links

        return []


    def _getEachAdvLink(self, link, debug=0):
        """
        Get security advisory links.
        """
        month_links = []
        links = []
        skip_list = ['s390', 'ia64']

        data = utils.getHTMLCon(link)

        ## Presently x86_64 platforms are not supported.
        all_links = re.findall('HREF=".*.html">[CentOS-announce].*i386.*\\n?.*security.*',\
                                                                          data)
        ## Comment above two line and 
        ## Uncomment below two lines to generate plugins for x86_64 platforms
        ## all_links = re.findall('HREF=".*.html">[CentOS-announce].*\\n?.*security.*',\
        ##                                                                   data)

        for line in all_links:
            if 's390' in line or 'ia64' in line or 's490' in line :
                if debug:
                    print "\nSkipped advisory : ", line
                continue

            tmp = re.findall('HREF="(.*.html)', line, re.IGNORECASE)
            if tmp:
                links.append(tmp[0])

        year_month = os.path.basename(os.path.split(link)[0])

        if links:
            for i in links:
                month_links.append(append_url + year_month + '/' + i)

            if debug:
                print "######################################################"
                print "\nCentOS Security Advisories for (%s) year-month" \
                                                             %(year_month)
                print "Total (%s) CentOS Security Advisories : " \
                                                       %(len(month_links))
                print "######################################################"

            return month_links

        return []


    def fetchHTML(self, year, debug=0):
        """
        Retrive CentOS Advisories locally
        """

        try:
            all_adv_links = []

            self.adv_id_link_map = self._mapAdvIdLink(year, debug)
            if not self.adv_id_link_map:
                print "ERROR: Not able to create required dict"
                print "Exiting ..."
                sys.exit(0)

            year_links = self._getYearLinks(self.main_url, year, \
                                                append_url, debug)

            if not year_links:
                print "ERROR: Din't find mentioned (%s) year in CentOS "+ \
                                                    "\Advisories..." %(year)
                print "Exiting ..."
                sys.exit(0)

            for link in year_links:
                month_links = self._getEachAdvLink(link, debug)
                if not month_links:
                    if debug:
                        print "No CentOS Security Advisories for : \n", link
                    continue

                all_adv_links.extend(month_links)

            if not all_adv_links:
                print "ERROR: Din't find any CentOS Security "+ \
                                             "Advisories...", year
                print "Exiting ..."
                sys.exit(0)

            all_adv_links = utils.removeDups(all_adv_links)

            for adv_url in all_adv_links:

                base_name = adv_url.split('/')[-1]
                month_year = adv_url.split('/')[-2]
                file_name = self.html_cache + month_year + '_' + base_name

                if not os.path.isfile(file_name):
                    if debug:
                        print "\nFetching CentOS Advisory..." + \
                                         os.path.basename(adv_url)
                    try:
                        utils.fetchFiles(adv_url, file_name, debug)
                    except Exception, msg:
                        print 'ERROR: Error fething the url %s' % msg

        except Exception, msg:
            print "Exception in : redhat -> Parser(Class) -> fetchHTML method()"
            sys.exit(msg)


    def _findAll(self, regex):
        """
        Returns Matched data
        """
        return regex.findall(self.Html_content)


    def getCVE(self, redhat_adv_con, debug=0):
        """
        Returns CVE list
        """
        cve_list = []

        if debug:
            print "\nGetting CVE List..."

        cve = re.findall('CVE-[0-9]+-[0-9]+', redhat_adv_con)
        can = re.findall('CAN-[0-9]+-[0-9]+', redhat_adv_con)

        if cve:
            cve_list.extend(cve)
        if can:
            cve_list.extend(can)

        cve_list = utils.removeDups(cve_list)

        if cve_list:
            cve_list = '", "'.join(cve_list)
        else:
            cve_list = ''

        if debug:
            print "CVE List : ", cve_list

        return cve_list


    def getAdvID(self, debug=0):
        """
        Returns CentOS Security Advisory ID
        """

        if debug:
            print "\nGetting Advisory ID..."

        adv_id_regex =  re.compile('<H1>.*(CESA-\d+:\d+-?\d+)')
        adv_id = self._findAll(adv_id_regex)

        if not adv_id:
            return ''

        if debug:
            print "Advisory ID : ", adv_id

        return adv_id[0].strip()


    def getAffectedPackage(self, debug=0):
        """
        Returns Affected Packages/RPM's
        """

        if debug:
            print "\nGetting Affected Packages/RPM List..."

        pkg =  re.findall("RHSA.*:\s?(.*) security update", self.Html_content)

        if not pkg:
            pkg =  re.findall(".*\.rpm", self.Html_content)
            if pkg:
                pkg = pkg[0]
                pkg = os.path.split(pkg)[-1]
                tmp = re.findall('(?<=-|_)\d+\.\d+.*', pkg)
                if not tmp:
                    tmp = re.findall('(?<=-|_)(\d.*)', pkg)

                if tmp:
                    pkg = pkg.replace(tmp[0], '')
        else:
            pkg = pkg[0]

        pkg = pkg.strip('-')
        pkg = pkg.strip()

        if not pkg:
            pkg = ''

        if debug:
            print "Affected Packages/RPMS : ", pkg

        return pkg


    def getDescription(self, redhat_adv_con, debug=0):
        """
        Returns Vulnerability Description
        """
        description = ''

        if debug:
            print "\nGetting Vulnerability Description..."

        desc =  re.findall("(?s)Description:\s+(.*)\n.*Solution", \
                                     redhat_adv_con, re.IGNORECASE)

        if desc:
            desc = desc[0].strip()

            ## Formatting the description
            for i in desc.split('\n'):
                description += '  ' + i + '\n'

            description = description.replace('"'," &qt ")
        else:
            description = ''

        return description


    def getAffectedProduct(self, debug=0):
        """
        Returns Affected Product/Platform
        """
        prd_list = []

        ## Get Affected Product/Platform
        prod_regex =  re.compile("<H1>.*(CentOS \d+)")
        prd_list = self._findAll(prod_regex)

        if not prd_list:
            return []

        if debug:
            print "\nAffected Product is/are : (%s)" %(prd_list)

        ## Don't include Product/Platform, If not in "os_map" Dict
        ref_list = []
        for prod in prd_list:
            prod = prod.strip()
            if os_map.has_key(prod):
                ref_list.append(prod)
            elif debug and prod:
                print "\nUPDATE: Not Generating Code for (%s) OS" %(prod)
                print "If Needed to generate code, then "+ \
                      "add into dict variable os_map in parser"

            if ref_list and debug:
                print "\nGenerating Code for (%s) Products " %(ref_list)

        return ref_list


    def getRPM(self, prod_list,  debug=0):
        """
        Returns OS Package Dictionary
        """
        os_pkg_dict = {}
        rpm_list = []

        if debug:
            print "\nGetting RPM List..."

        rpms = re.findall(".*\.rpm", self.Html_content)

        if not rpms:
            ## Exception for some advisories(dont have .rpm at the end).
            rpms = re.findall(".*\.i386", self.Html_content)
            if not rpms:
                print "No RPMS found"
                return []

        for rpm in rpms:
            rpm = os.path.split(rpm)
            if rpm:
              rpm = rpm[-1]
              flag = 1
              for i in strip_val:
                  if i in rpm:
                      rpm_list.append(rpm)
                      flag = 0

              if flag and ".rpm" in rpm:
                  if debug:
                      print "Found rpm other then, %s :: %s" \
                                        % (', '.join(strip_val), rpm)
        if not rpm_list:
            if debug:
                print "\nERROR: RPMs not found for Product ", prod_list
            return []

        rpm_list = utils.stripIt(rpm_list, strip_val)
        rpm_list = utils.removeDups(rpm_list)

        for i in prod_list:
           if os_map.has_key(i):
             os_pkg_dict[os_map[i]] = rpm_list

        if debug:
            print "OS PKG Dict : ", os_pkg_dict

        return os_pkg_dict


    def formatReference(self, main_url, file_name, debug=0):
       """
       Constructs a reference for advisory
       """
       if not main_url.endswith('/'):
           main_url = main_url + '/'

       reference = main_url + '/'.join(file_name.split('_'))

       return reference


    def getRedhatAdvCon(self, adv_id, debug=0):
        """
        Returns html content from redhat advisory for centos
        """
        link = ''

        adv_id = adv_id.replace('CESA', 'RHSA')

        if self.adv_id_link_map.has_key(adv_id):
            link = self.adv_id_link_map[adv_id]
        else:
            for i in self.adv_id_link_map.keys():
                if adv_id in i:
                    link = self.adv_id_link_map[i]
                    break
                adv_id1 = adv_id.split('-')
                if len(adv_id1) > 2:
                    adv_id1 = '-'.join(adv_id1[:2])
                    if adv_id1 in i:
                        link = self.adv_id_link_map[i]
                        break
        if not link:
            if debug:
                print "ERROR: link to get description for %s id not found" \
                                                                   %(adv_id)
                return ''

        if debug:
            print "Redhat advisory link (%s) for (%s) centos advisory" \
                                                         %(link, adv_id)

        data = utils.getHTMLCon(link)
        if not data:
            if debug:
                print "Din't get the content for redhat advisory : ", link

        return data


    def getFileName(self, adv_id, prod, platform, debug=0):
        """
        Returns filename
        """
        platform = os_map[platform].lower()
        arch_list = ['i586', 'x86_64', 'i386', 'i686' ]
        arch = ''

        arch_line = re.findall('<H1>.*\\n?.*</H1>', self.Html_content)
        if arch_line:
            arch_line = arch_line[0]
            for i in arch_list:
                if arch_line.find(i) >= 0:
                    arch = i
                    break
        if arch:
           fileName = "_".join([adv_id.replace(':', '_'), prod, platform, arch])
        else:
           if debug:
               print "ERROR: Din't find archetecture for : ", adv_id
           fileName = "_".join([adv_id.replace(':', '_'),prod, platform])

        return (fileName, platform, arch)



    def parser(self, html_content, debug=0):
        """
        Main parser function, builds the parser object
        by invoking parse functions
        """

        try:
            if debug:
                print "CentOS Parser Initiated..."

            self.Html_content = html_content.replace('\r\n', '\n')

            self.AdvID = self.getAdvID(debug)
            if not self.AdvID or self.AdvID == '':
                if debug:
                    print "\nERROR: Advisory ID not found..."
                return False

            redhat_adv_con = self.getRedhatAdvCon(self.AdvID, debug)
            if not redhat_adv_con:
                if debug:
                    print "\nERROR: Din't get Redhat Advisory content for "+ \
                                          "CentOD (%s) Adv ID ...", self.AdvID
                return False

            self.CVEs = self.getCVE(redhat_adv_con, debug)

            self.Platforms = self.getAffectedProduct(debug)
            if not self.Platforms or self.Platforms == []:
                if debug:
                    print "\nERROR: Required Products not found..."
                return False

            self.Packages = self.getRPM(self.Platforms, debug)
            if not self.Packages or self.Packages == '':
                if debug:
                    print "\nERROR: Required RPMS not found..."
                return False

            self.Description = self.getDescription(redhat_adv_con, debug)
            if not self.Description or self.Description == '':
                if debug:
                    print "\nERROR: Description not found..."
                return False

            self.Product = self.getAffectedPackage(debug)
            if not self.Product or self.Product == '':
                if debug:
                    print "\nERROR: Required Package not found..."
                return False

            self.Platforms = self.Platforms[0]

            self.Summary = self.Product

            self.Impact = '  '

            (self.FileName, platform, arch) = self.getFileName(self.AdvID, \
                                        self.Product, self.Platforms, debug)

            ## Construct File Name
            if len(self.AdvID.split('-')) == 2:
                (name, value) = self.AdvID.split('-')
            else:
                (name, value1, value2) = self.AdvID.split('-')
                value = value1 + '-' + value2

            ## Plugin name
            self.Name = self.Product + " " + self.AdvID + " " + \
                                            platform + " " + arch 

            ## Set XREF
            self.XREF = [name, value]

            if debug:
                print "\nAll mandatory attributes are parsed: ", self.AdvID

            return True


        except Exception, msg:
            print 'Exception in Parser redhat -> Parser -> parser() Method'
            sys.exit(msg)
