Top stories

    United States

    World

    Business

    Technology

    Sports

    Science

    Arts

    Health

    Style

    Hobby

    Scraper
    news
    const maxField = 8192 const maxSummary = 16384 const maxDescription = 16384 const boundedString = (x: string | undefined, length: number) => x ? (x.length > length ? x.substring(0, length) : x) : x function scraper( url: string, item: FeedParser.Item, context: ScraperContext, ) { const meta: { [key: string]: any } = item.meta let locale = item.language if (locale) locale = locale.toLowerCase() if (context.options?.debugRSS) { console.log('FeedParser.Item:') console.log(item) } const props: Props = {} if (item.title) props.title = boundedString(item.title, maxField) if (item.author) props.author = boundedString(item.author, maxField) if (item.summary) props.summary = boundedString(item.summary, maxSummary) if (item.description) props.description = boundedString(item.description, maxDescription) if (item.image?.title) props.imageTitle = boundedString(item.image.title, maxField) if (item.image?.url) props.imageUrl = boundedString(item.image.url, maxField) if (item.comments) props.commentsUrl = boundedString(item.comments, maxField) if (item.enclosures) { for (const enclosure of item.enclosures) { if (enclosure.url && enclosure.type?.startsWith('video') && !props.videoUrl) { props.videoUrl = boundedString(enclosure.url, maxField) } else if (enclosure.url && enclosure.type?.startsWith('audio') && !props.audioUrl) { props.audioUrl = boundedString(enclosure.url, maxField) } else if (enclosure.url && enclosure.type?.startsWith('image') && !props.imageUrl) { props.imageUrl = boundedString(enclosure.url, maxField) } } } const news = { date: item.pubdate || item.date || new Date(), link: boundedString(item.link ?? props.videoUrl ?? props.audioUrl, maxField), feed: boundedString(url, maxField), guid: boundedString(item.guid || item.link, maxField) || '', props, tags: { ...context.options?.tags, locale: boundedString(context.options?.tags?.locale || locale, maxField), category: boundedString(context.options?.tags?.category || '', maxField), }, } if ((item.categories?.length ?? 0) > 0) { news.tags.topics = [ ...item.categories ] } if (!news.props.imageUrl) { news.props.imageUrl = scanHtmlForImage(news.props.description || '') } if (!news.props.imageUrl) { news.props.imageUrl = scanHtmlForImage(news.props.summary || '') } if (!news.props.imageUrl) delete news.props.imageUrl if (context.options?.props?.imageUrl) { news.props.feedImageUrl = context.options.props.imageUrl } if (context.options?.props?.name) { news.props.feedName = context.options.props.name } const feedProps = context.options?.props if (feedProps) { if (!feedProps.name && meta?.title) feedProps.name = meta.title if (!feedProps.description && meta?.description) feedProps.description = meta.description if (!feedProps.imageUrl && meta?.image?.url) feedProps.imageUrl = meta.image.url if (!feedProps.site && meta?.link) feedProps.site = meta.link } if (!context.functions) throw new Error('Expected context functions') return context.functions.prepare(news, context.options) }